wactorz
Wactorz - Actor-Model Multi-Agent Framework
1"""Wactorz - Actor-Model Multi-Agent Framework""" 2from ._version import __version__ 3from .core.actor import Actor, ActorState, Message, MessageType 4from .core.registry import ActorSystem, ActorRegistry 5__all__ = [ 6 "__version__", 7 "Actor", "ActorState", "Message", "MessageType", 8 "ActorSystem", "ActorRegistry", 9] 10# Optional agents — only exported when their dependencies are available. 11try: 12 from .agents.llm_agent import LLMAgent, AnthropicProvider, OpenAIProvider, OllamaProvider, NIMProvider 13 __all__ += ["LLMAgent", "AnthropicProvider", "OpenAIProvider", "OllamaProvider", "NIMProvider"] 14except ImportError: 15 pass 16try: 17 from .agents.main_actor import MainActor 18 from .agents.monitor_agent import MonitorActor 19 from .agents.manual_agent import ManualAgent 20 from .agents.planner_agent import PlannerAgent 21 from .agents.dynamic_agent import DynamicAgent 22 from .agents.installer_agent import InstallerAgent 23 from .agents.catalog_agent import CatalogAgent 24 __all__ += ["MainActor", "MonitorActor", "CodeAgent", "ManualAgent", "PlannerAgent", 25 "DynamicAgent", "InstallerAgent", "CatalogAgent"] 26except ImportError: 27 pass 28#try: 29# from .agents.ml_agent import MLAgent, YOLOAgent, AnomalyDetectorAgent 30# __all__ += ["MLAgent", "YOLOAgent", "AnomalyDetectorAgent"] 31#except ImportError: 32# pass 33try: 34 from .agents.home_assistant_hardware_agent import HomeAssistantHardwareAgent 35 __all__ += ["HomeAssistantHardwareAgent"] 36except ImportError: 37 pass
103class Actor(ABC): 104 """ 105 Base Actor class. All agents inherit from this. 106 Actors are fully async and communicate only through messages. 107 """ 108 109 def __init__( 110 self, 111 actor_id: Optional[str] = None, 112 name: Optional[str] = None, 113 persistence_dir: str = "./actor_state", 114 mailbox_size: int = 1000, 115 ): 116 if actor_id: 117 self.actor_id = actor_id 118 elif name: 119 # Deterministic UUID from name — same name always gets same ID across restarts 120 self.actor_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"agentflow.actor.{name}")) 121 else: 122 self.actor_id = str(uuid.uuid4()) 123 self.name = name or f"actor-{self.actor_id[:8]}" 124 self.state = ActorState.IDLE 125 self.metrics = ActorMetrics() 126 127 # Async mailbox (inbox) 128 self._mailbox: asyncio.Queue = asyncio.Queue(maxsize=mailbox_size) 129 self._outbox: dict[str, asyncio.Queue] = {} # actor_id -> queue ref 130 131 # Registry reference (set by ActorSystem) 132 self._registry: Optional["ActorRegistry"] = None 133 self._mqtt_client: Optional[Any] = None 134 self._mqtt_broker: str = "localhost" 135 self._mqtt_port: int = 1883 136 137 # Persistence 138 # Use name as persistence folder so it survives restarts with same name 139 # Falls back to actor_id for anonymous actors 140 safe_name = self.name.replace("/", "_").replace("\\", "_") 141 self._persistence_dir = Path(persistence_dir) / safe_name 142 self._persistence_dir.mkdir(parents=True, exist_ok=True) 143 self._persistent_state: dict = {} 144 145 # Protection — if True, stop/delete/pause commands are ignored 146 self.protected: bool = False 147 148 # Supervisor reference — set by Supervisor when this actor is registered under it 149 self.supervisor_id: Optional[str] = None 150 151 # Handlers 152 self._handlers: dict[MessageType, Callable] = {} 153 self._setup_default_handlers() 154 155 # Background tasks 156 self._tasks: list[asyncio.Task] = [] 157 158 logger.info(f"[{self.name}] Actor created with id={self.actor_id}") 159 160 # ─── Lifecycle ──────────────────────────────────────────────────────────── 161 162 async def start(self): 163 """Start the actor's event loop.""" 164 self.state = ActorState.RUNNING 165 self.metrics.start_time = time.time() 166 await self._load_persistent_state() 167 await self.on_start() 168 self._tasks.append(asyncio.create_task(self._message_loop())) 169 self._tasks.append(asyncio.create_task(self._heartbeat_loop())) 170 self._tasks.append(asyncio.create_task(self._command_listener())) 171 await self._publish_status() 172 logger.info(f"[{self.name}] Actor started.") 173 174 async def stop(self): 175 """Gracefully stop the actor.""" 176 self.state = ActorState.STOPPED 177 for task in self._tasks: 178 task.cancel() 179 await self.on_stop() # on_stop() calls persist() first 180 await self._save_persistent_state() # THEN save to disk 181 await self._publish_status() 182 logger.info(f"[{self.name}] Actor stopped.") 183 184 async def pause(self): 185 self.state = ActorState.PAUSED 186 await self._publish_status() 187 188 async def resume(self): 189 self.state = ActorState.RUNNING 190 await self._publish_status() 191 192 # ─── Message Loop ───────────────────────────────────────────────────────── 193 194 async def _message_loop(self): 195 """Main message processing loop.""" 196 while self.state not in (ActorState.STOPPED, ActorState.FAILED): 197 try: 198 if self.state == ActorState.PAUSED: 199 await asyncio.sleep(0.1) 200 continue 201 202 msg = await asyncio.wait_for(self._mailbox.get(), timeout=1.0) 203 # Only count meaningful messages — not heartbeats, status pings, lifecycle 204 _noise = {MessageType.HEARTBEAT, MessageType.STATUS_REQUEST, 205 MessageType.STATUS_RESPONSE, MessageType.STOP, 206 MessageType.PAUSE, MessageType.RESUME} 207 if msg.type not in _noise: 208 self.metrics.messages_processed += 1 209 await self._dispatch(msg) 210 self._mailbox.task_done() 211 212 except asyncio.TimeoutError: 213 continue 214 except asyncio.CancelledError: 215 break 216 except Exception as e: 217 self.metrics.errors += 1 218 logger.error(f"[{self.name}] Error in message loop: {e}", exc_info=True) 219 220 async def _dispatch(self, msg: Message): 221 """Dispatch message to the appropriate handler.""" 222 handler = self._handlers.get(msg.type) 223 if handler: 224 await handler(msg) 225 else: 226 await self.handle_message(msg) 227 228 def _setup_default_handlers(self): 229 self._handlers = { 230 MessageType.STOP: self._handle_stop, 231 MessageType.PAUSE: self._handle_pause, 232 MessageType.RESUME: self._handle_resume, 233 MessageType.STATUS_REQUEST: self._handle_status_request, 234 MessageType.HEARTBEAT: self._handle_heartbeat_msg, 235 } 236 237 async def _handle_stop(self, msg: Message): 238 await self.stop() 239 240 async def _handle_pause(self, msg: Message): 241 await self.pause() 242 243 async def _handle_resume(self, msg: Message): 244 await self.resume() 245 246 async def _handle_status_request(self, msg: Message): 247 status = self.get_status() 248 # Reply to sender_id (always), reply_to is optional override 249 target = msg.reply_to or msg.sender_id 250 if target: 251 await self.send(target, MessageType.STATUS_RESPONSE, status) 252 253 async def _handle_heartbeat_msg(self, msg: Message): 254 pass # Monitor actor handles these 255 256 # ─── Heartbeat ──────────────────────────────────────────────────────────── 257 258 async def _heartbeat_loop(self, interval: float = 10.0): 259 """Periodically publish heartbeat via MQTT.""" 260 # Publish immediately on start so monitor sees agent right away 261 await asyncio.sleep(0.5) 262 await self._mqtt_publish(f"agents/{self.actor_id}/heartbeat", self._build_heartbeat()) 263 await self._mqtt_publish(f"agents/{self.actor_id}/metrics", self._build_metrics()) 264 while self.state not in (ActorState.STOPPED, ActorState.FAILED): 265 try: 266 await asyncio.sleep(interval) 267 hb = self._build_heartbeat() 268 self.metrics.last_heartbeat = time.time() 269 await self._mqtt_publish(f"agents/{self.actor_id}/heartbeat", hb) 270 await self._mqtt_publish(f"agents/{self.actor_id}/metrics", self._build_metrics()) 271 except asyncio.CancelledError: 272 break 273 except Exception as e: 274 logger.warning(f"[{self.name}] Heartbeat error: {e}") 275 276 def _build_heartbeat(self) -> dict: 277 proc = psutil.Process() 278 return { 279 "actor_id": self.actor_id, 280 "name": self.name, 281 "timestamp": time.time(), 282 "state": self.state.value, 283 "cpu": proc.cpu_percent(interval=0.1), 284 "memory_mb": proc.memory_info().rss / 1024 / 1024, 285 "task": self._current_task_description(), 286 "protected": self.protected, 287 } 288 289 def _build_metrics(self) -> dict: 290 return { 291 "actor_id": self.actor_id, 292 "messages_processed": self.metrics.messages_processed, 293 "errors": self.metrics.errors, 294 "uptime": self.metrics.uptime, 295 "tasks_completed": self.metrics.tasks_completed, 296 "tasks_failed": self.metrics.tasks_failed, 297 "restart_count": self.metrics.restart_count, 298 } 299 300 async def _command_listener(self): 301 """Listen for commands published to agents/{id}/commands via MQTT.""" 302 try: 303 import aiomqtt 304 except ImportError: 305 return 306 307 topic = f"agents/{self.actor_id}/commands" 308 while self.state not in (ActorState.STOPPED, ActorState.FAILED): 309 try: 310 async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client: 311 await client.subscribe(topic) 312 logger.debug(f"[{self.name}] Subscribed to {topic}") 313 async for message in client.messages: 314 try: 315 data = json.loads(message.payload.decode()) 316 command = data.get("command", "") 317 logger.info(f"[{self.name}] Received command: {command}") 318 if self.protected and command in ("stop", "pause", "delete"): 319 logger.warning(f"[{self.name}] Ignoring '{command}' — actor is protected.") 320 continue 321 if command == "stop": 322 await self.stop() 323 return 324 elif command == "pause": 325 await self.pause() 326 elif command == "resume": 327 await self.resume() 328 elif command == "delete": 329 # If main actor knows about this agent, remove from spawn registry 330 if self._registry: 331 main = self._registry.find_by_name("main") 332 if main and hasattr(main, "_remove_from_spawn_registry"): 333 main._remove_from_spawn_registry(self.name) 334 await self._registry.unregister(self.actor_id) 335 await self.stop() 336 return 337 except Exception as e: 338 logger.error(f"[{self.name}] Command parse error: {e}") 339 except asyncio.CancelledError: 340 break 341 except Exception as e: 342 if self.state not in (ActorState.STOPPED, ActorState.FAILED): 343 await asyncio.sleep(5) 344 345 def _current_task_description(self) -> str: 346 return "idle" # Override in subclasses 347 348 # ─── Messaging ──────────────────────────────────────────────────────────── 349 350 async def send(self, target_id: str, msg_type: MessageType, payload: Any = None) -> bool: 351 """Send a message to another actor.""" 352 if self._registry is None: 353 logger.warning(f"[{self.name}] No registry attached, cannot send messages.") 354 return False 355 msg = Message(type=msg_type, sender_id=self.actor_id, payload=payload) 356 return await self._registry.deliver(target_id, msg) 357 358 async def broadcast(self, msg_type: MessageType, payload: Any = None): 359 """Broadcast to all registered actors.""" 360 if self._registry: 361 await self._registry.broadcast(self.actor_id, msg_type, payload) 362 363 async def receive(self, msg: Message): 364 """External entry point - put message in mailbox.""" 365 await self._mailbox.put(msg) 366 367 # ─── Actor Spawning ─────────────────────────────────────────────────────── 368 369 async def spawn(self, actor_class: type, **kwargs) -> "Actor": 370 """ 371 Spawn a child actor. The child inherits: 372 - MQTT client (so it can publish heartbeats/status) 373 - Registry (so it can send/receive messages) 374 - Persistence dir defaults to same root 375 """ 376 # Default persistence to same root as parent 377 kwargs.setdefault("persistence_dir", str(self._persistence_dir.parent)) 378 379 child = actor_class(**kwargs) 380 381 # Inherit everything from parent 382 child._mqtt_client = self._mqtt_client # MQTT publish connection 383 child._mqtt_broker = self._mqtt_broker # broker address for command listener 384 child._mqtt_port = self._mqtt_port # broker port 385 child._registry = self._registry # message routing 386 387 # Register in registry 388 if self._registry: 389 await self._registry.register(child) 390 391 # Start the child 392 await child.start() 393 394 # Immediately announce to monitor - don't wait for heartbeat loop 395 await child._publish_status() 396 await child._mqtt_publish( 397 f"agents/{child.actor_id}/heartbeat", 398 child._build_heartbeat(), 399 ) 400 await child._mqtt_publish( 401 f"agents/{child.actor_id}/metrics", 402 child._build_metrics(), 403 ) 404 405 # Notify parent's topic that it spawned a child 406 await self._mqtt_publish( 407 f"agents/{self.actor_id}/spawned", 408 {"child_id": child.actor_id, "child_name": child.name, "timestamp": time.time()}, 409 ) 410 logger.info(f"[{self.name}] Spawned: {child.name} ({child.actor_id[:8]})") 411 return child 412 413 # ─── Persistence ────────────────────────────────────────────────────────── 414 415 async def _save_persistent_state(self): 416 path = self._persistence_dir / "state.pkl" 417 try: 418 with open(path, "wb") as f: 419 pickle.dump(self._persistent_state, f) 420 except Exception as e: 421 logger.error(f"[{self.name}] Failed to save state: {e}") 422 423 async def _load_persistent_state(self): 424 path = self._persistence_dir / "state.pkl" 425 if path.exists(): 426 try: 427 with open(path, "rb") as f: 428 self._persistent_state = pickle.load(f) 429 logger.info(f"[{self.name}] Loaded persistent state.") 430 except Exception as e: 431 logger.error(f"[{self.name}] Failed to load state: {e}") 432 433 def persist(self, key: str, value: Any): 434 self._persistent_state[key] = value 435 # Write to disk immediately so state survives Ctrl+C and crashes 436 path = self._persistence_dir / "state.pkl" 437 try: 438 with open(path, "wb") as f: 439 pickle.dump(self._persistent_state, f) 440 except Exception as e: 441 logger.debug(f"[{self.name}] persist write failed: {e}") 442 # Save to disk immediately so state survives crashes and Ctrl+C 443 path = self._persistence_dir / "state.pkl" 444 try: 445 import pickle as _pickle 446 with open(path, "wb") as f: 447 _pickle.dump(self._persistent_state, f) 448 except Exception as e: 449 logger.debug(f"[{self.name}] persist write failed: {e}") 450 451 def recall(self, key: str, default: Any = None) -> Any: 452 return self._persistent_state.get(key, default) 453 454 # ─── MQTT ───────────────────────────────────────────────────────────────── 455 456 async def _mqtt_publish(self, topic: str, payload: Any, retain: bool = False, qos: int = 0): 457 if self._mqtt_client: 458 try: 459 await self._mqtt_client.publish(topic, json.dumps(payload), retain=retain, qos=qos) 460 except Exception as e: 461 logger.debug(f"[{self.name}] MQTT publish failed: {e}") 462 463 async def _publish_status(self): 464 await self._mqtt_publish(f"agents/{self.actor_id}/status", self.get_status()) 465 466 # ─── Status ─────────────────────────────────────────────────────────────── 467 468 def get_status(self) -> dict: 469 return { 470 "actor_id": self.actor_id, 471 "name": self.name, 472 "state": self.state.value, 473 "uptime": self.metrics.uptime, 474 "messages_processed": self.metrics.messages_processed, 475 "restart_count": self.metrics.restart_count, 476 "supervised": self.supervisor_id is not None, 477 } 478 479 # ─── Abstract / Override ────────────────────────────────────────────────── 480 481 async def on_start(self): 482 """Called when actor starts. Override for init logic.""" 483 pass 484 485 async def publish_manifest(self, description: str = "", publishes: list = None, 486 capabilities: list = None, input_schema: dict = None, 487 output_schema: dict = None): 488 """ 489 Publish a capability manifest so main's topic registry can discover this actor. 490 Call from on_start() in any actor that wants to be discoverable. 491 Manifests are retained — main sees them immediately even after restart. 492 493 input_schema / output_schema — dicts describing expected payload fields, e.g.: 494 input_schema = {"city": "str — city name to fetch weather for"} 495 output_schema = {"temp_c": "float", "condition": "str", "humidity": "int"} 496 """ 497 import time as _t 498 manifest = { 499 "name": self.name, 500 "actor_id": self.actor_id, 501 "description": description, 502 "publishes": publishes or [], 503 "capabilities": capabilities or [], 504 "input_schema": input_schema or {}, 505 "output_schema": output_schema or {}, 506 "timestamp": _t.time(), 507 } 508 await self._mqtt_publish(f"agents/{self.actor_id}/manifest", manifest, retain=True) 509 510 async def on_stop(self): 511 """Called when actor stops. Override for cleanup.""" 512 pass 513 514 @abstractmethod 515 async def handle_message(self, msg: Message): 516 """Handle messages not caught by default handlers.""" 517 pass 518 519 def __repr__(self): 520 return f"<Actor name={self.name} id={self.actor_id[:8]} state={self.state.value}>"
Base Actor class. All agents inherit from this. Actors are fully async and communicate only through messages.
162 async def start(self): 163 """Start the actor's event loop.""" 164 self.state = ActorState.RUNNING 165 self.metrics.start_time = time.time() 166 await self._load_persistent_state() 167 await self.on_start() 168 self._tasks.append(asyncio.create_task(self._message_loop())) 169 self._tasks.append(asyncio.create_task(self._heartbeat_loop())) 170 self._tasks.append(asyncio.create_task(self._command_listener())) 171 await self._publish_status() 172 logger.info(f"[{self.name}] Actor started.")
Start the actor's event loop.
174 async def stop(self): 175 """Gracefully stop the actor.""" 176 self.state = ActorState.STOPPED 177 for task in self._tasks: 178 task.cancel() 179 await self.on_stop() # on_stop() calls persist() first 180 await self._save_persistent_state() # THEN save to disk 181 await self._publish_status() 182 logger.info(f"[{self.name}] Actor stopped.")
Gracefully stop the actor.
350 async def send(self, target_id: str, msg_type: MessageType, payload: Any = None) -> bool: 351 """Send a message to another actor.""" 352 if self._registry is None: 353 logger.warning(f"[{self.name}] No registry attached, cannot send messages.") 354 return False 355 msg = Message(type=msg_type, sender_id=self.actor_id, payload=payload) 356 return await self._registry.deliver(target_id, msg)
Send a message to another actor.
358 async def broadcast(self, msg_type: MessageType, payload: Any = None): 359 """Broadcast to all registered actors.""" 360 if self._registry: 361 await self._registry.broadcast(self.actor_id, msg_type, payload)
Broadcast to all registered actors.
363 async def receive(self, msg: Message): 364 """External entry point - put message in mailbox.""" 365 await self._mailbox.put(msg)
External entry point - put message in mailbox.
369 async def spawn(self, actor_class: type, **kwargs) -> "Actor": 370 """ 371 Spawn a child actor. The child inherits: 372 - MQTT client (so it can publish heartbeats/status) 373 - Registry (so it can send/receive messages) 374 - Persistence dir defaults to same root 375 """ 376 # Default persistence to same root as parent 377 kwargs.setdefault("persistence_dir", str(self._persistence_dir.parent)) 378 379 child = actor_class(**kwargs) 380 381 # Inherit everything from parent 382 child._mqtt_client = self._mqtt_client # MQTT publish connection 383 child._mqtt_broker = self._mqtt_broker # broker address for command listener 384 child._mqtt_port = self._mqtt_port # broker port 385 child._registry = self._registry # message routing 386 387 # Register in registry 388 if self._registry: 389 await self._registry.register(child) 390 391 # Start the child 392 await child.start() 393 394 # Immediately announce to monitor - don't wait for heartbeat loop 395 await child._publish_status() 396 await child._mqtt_publish( 397 f"agents/{child.actor_id}/heartbeat", 398 child._build_heartbeat(), 399 ) 400 await child._mqtt_publish( 401 f"agents/{child.actor_id}/metrics", 402 child._build_metrics(), 403 ) 404 405 # Notify parent's topic that it spawned a child 406 await self._mqtt_publish( 407 f"agents/{self.actor_id}/spawned", 408 {"child_id": child.actor_id, "child_name": child.name, "timestamp": time.time()}, 409 ) 410 logger.info(f"[{self.name}] Spawned: {child.name} ({child.actor_id[:8]})") 411 return child
Spawn a child actor. The child inherits:
- MQTT client (so it can publish heartbeats/status)
- Registry (so it can send/receive messages)
- Persistence dir defaults to same root
433 def persist(self, key: str, value: Any): 434 self._persistent_state[key] = value 435 # Write to disk immediately so state survives Ctrl+C and crashes 436 path = self._persistence_dir / "state.pkl" 437 try: 438 with open(path, "wb") as f: 439 pickle.dump(self._persistent_state, f) 440 except Exception as e: 441 logger.debug(f"[{self.name}] persist write failed: {e}") 442 # Save to disk immediately so state survives crashes and Ctrl+C 443 path = self._persistence_dir / "state.pkl" 444 try: 445 import pickle as _pickle 446 with open(path, "wb") as f: 447 _pickle.dump(self._persistent_state, f) 448 except Exception as e: 449 logger.debug(f"[{self.name}] persist write failed: {e}")
468 def get_status(self) -> dict: 469 return { 470 "actor_id": self.actor_id, 471 "name": self.name, 472 "state": self.state.value, 473 "uptime": self.metrics.uptime, 474 "messages_processed": self.metrics.messages_processed, 475 "restart_count": self.metrics.restart_count, 476 "supervised": self.supervisor_id is not None, 477 }
485 async def publish_manifest(self, description: str = "", publishes: list = None, 486 capabilities: list = None, input_schema: dict = None, 487 output_schema: dict = None): 488 """ 489 Publish a capability manifest so main's topic registry can discover this actor. 490 Call from on_start() in any actor that wants to be discoverable. 491 Manifests are retained — main sees them immediately even after restart. 492 493 input_schema / output_schema — dicts describing expected payload fields, e.g.: 494 input_schema = {"city": "str — city name to fetch weather for"} 495 output_schema = {"temp_c": "float", "condition": "str", "humidity": "int"} 496 """ 497 import time as _t 498 manifest = { 499 "name": self.name, 500 "actor_id": self.actor_id, 501 "description": description, 502 "publishes": publishes or [], 503 "capabilities": capabilities or [], 504 "input_schema": input_schema or {}, 505 "output_schema": output_schema or {}, 506 "timestamp": _t.time(), 507 } 508 await self._mqtt_publish(f"agents/{self.actor_id}/manifest", manifest, retain=True)
Publish a capability manifest so main's topic registry can discover this actor. Call from on_start() in any actor that wants to be discoverable. Manifests are retained — main sees them immediately even after restart.
input_schema / output_schema — dicts describing expected payload fields, e.g.: input_schema = {"city": "str — city name to fetch weather for"} output_schema = {"temp_c": "float", "condition": "str", "humidity": "int"}
42class ActorState(str, Enum): 43 IDLE = "idle" 44 RUNNING = "running" 45 PAUSED = "paused" 46 STOPPED = "stopped" 47 FAILED = "failed"
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.
68@dataclass 69class Message: 70 type: MessageType 71 sender_id: str 72 payload: Any = None 73 reply_to: Optional[str] = None 74 message_id: str = field(default_factory=lambda: str(uuid.uuid4())) 75 timestamp: float = field(default_factory=time.time) 76 77 def to_dict(self) -> dict: 78 return { 79 "type": self.type.value, 80 "sender_id": self.sender_id, 81 "payload": self.payload, 82 "reply_to": self.reply_to, 83 "message_id": self.message_id, 84 "timestamp": self.timestamp, 85 }
50class MessageType(str, Enum): 51 # Lifecycle 52 START = "start" 53 STOP = "stop" 54 PAUSE = "pause" 55 RESUME = "resume" 56 DELETE = "delete" 57 # Communication 58 TASK = "task" 59 RESULT = "result" 60 HEARTBEAT = "heartbeat" 61 SPAWN = "spawn" 62 # Internal 63 TICK = "tick" 64 STATUS_REQUEST = "status_request" 65 STATUS_RESPONSE = "status_response"
str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.
344class ActorSystem: 345 """Top-level orchestrator.""" 346 347 def __init__(self, mqtt_broker: str = "localhost", mqtt_port: int = 1883): 348 self.registry = ActorRegistry() 349 self._mqtt_broker = mqtt_broker 350 self._mqtt_port = mqtt_port 351 self._mqtt_client = None 352 self._running = False 353 self._supervisor: Optional[Supervisor] = None 354 355 def _inject(self, actor: Actor): 356 """Inject MQTT client + broker/port into an actor so it can publish and subscribe.""" 357 actor._mqtt_client = self._mqtt_client 358 actor._mqtt_broker = self._mqtt_broker 359 actor._mqtt_port = self._mqtt_port 360 361 @property 362 def supervisor(self) -> Supervisor: 363 """Lazy-create the Supervisor bound to this system's registry and inject function.""" 364 if self._supervisor is None: 365 self._supervisor = Supervisor(self.registry, self._inject) 366 return self._supervisor 367 368 async def start(self, *initial_actors: Actor): 369 self._running = True 370 self._mqtt_client = await _MQTTPublisher.create(self._mqtt_broker, self._mqtt_port) 371 372 for actor in initial_actors: 373 self._inject(actor) 374 await self.registry.register(actor) 375 await actor.start() 376 377 logger.info(f"[ActorSystem] Started with {len(initial_actors)} actors.") 378 379 async def spawn(self, actor_class: Type[Actor], **kwargs) -> Actor: 380 """Spawn and register a new actor in the system.""" 381 actor = actor_class(**kwargs) 382 self._inject(actor) 383 await self.registry.register(actor) 384 await actor.start() 385 return actor 386 387 async def stop_all(self): 388 self._running = False 389 # Stop supervisor first so it doesn't try to restart actors we're about to stop 390 if self._supervisor: 391 await self._supervisor.stop() 392 actors = self.registry.all_actors() 393 await asyncio.gather(*[a.stop() for a in actors], return_exceptions=True) 394 if self._mqtt_client: 395 await self._mqtt_client.disconnect() 396 logger.info("[ActorSystem] All actors stopped.") 397 398 async def run_forever(self): 399 try: 400 while self._running: 401 await asyncio.sleep(1) 402 except (KeyboardInterrupt, asyncio.CancelledError): 403 logger.info("[ActorSystem] Shutdown signal received.") 404 await self.stop_all()
Top-level orchestrator.
361 @property 362 def supervisor(self) -> Supervisor: 363 """Lazy-create the Supervisor bound to this system's registry and inject function.""" 364 if self._supervisor is None: 365 self._supervisor = Supervisor(self.registry, self._inject) 366 return self._supervisor
Lazy-create the Supervisor bound to this system's registry and inject function.
368 async def start(self, *initial_actors: Actor): 369 self._running = True 370 self._mqtt_client = await _MQTTPublisher.create(self._mqtt_broker, self._mqtt_port) 371 372 for actor in initial_actors: 373 self._inject(actor) 374 await self.registry.register(actor) 375 await actor.start() 376 377 logger.info(f"[ActorSystem] Started with {len(initial_actors)} actors.")
379 async def spawn(self, actor_class: Type[Actor], **kwargs) -> Actor: 380 """Spawn and register a new actor in the system.""" 381 actor = actor_class(**kwargs) 382 self._inject(actor) 383 await self.registry.register(actor) 384 await actor.start() 385 return actor
Spawn and register a new actor in the system.
387 async def stop_all(self): 388 self._running = False 389 # Stop supervisor first so it doesn't try to restart actors we're about to stop 390 if self._supervisor: 391 await self._supervisor.stop() 392 actors = self.registry.all_actors() 393 await asyncio.gather(*[a.stop() for a in actors], return_exceptions=True) 394 if self._mqtt_client: 395 await self._mqtt_client.disconnect() 396 logger.info("[ActorSystem] All actors stopped.")
61class ActorRegistry: 62 """Maintains a map of all living actors and routes messages between them.""" 63 64 def __init__(self): 65 self._actors: dict[str, Actor] = {} 66 self._lock = asyncio.Lock() 67 68 async def register(self, actor: Actor): 69 async with self._lock: 70 actor._registry = self 71 self._actors[actor.actor_id] = actor 72 logger.info(f"[Registry] Registered {actor.name} ({actor.actor_id[:8]})") 73 74 async def unregister(self, actor_id: str): 75 async with self._lock: 76 if actor_id in self._actors: 77 del self._actors[actor_id] 78 logger.info(f"[Registry] Unregistered {actor_id[:8]}") 79 80 async def deliver(self, target_id: str, msg: Message) -> bool: 81 actor = self._actors.get(target_id) 82 if actor is None: 83 logger.warning(f"[Registry] Unknown target: {target_id[:8]}") 84 return False 85 await actor.receive(msg) 86 return True 87 88 async def broadcast(self, sender_id: str, msg_type: MessageType, payload=None): 89 msg = Message(type=msg_type, sender_id=sender_id, payload=payload) 90 for actor_id, actor in list(self._actors.items()): 91 if actor_id != sender_id: 92 await actor.receive(msg) 93 94 def get(self, actor_id: str) -> Optional[Actor]: 95 return self._actors.get(actor_id) 96 97 def all_actors(self) -> list[Actor]: 98 return list(self._actors.values()) 99 100 def find_by_name(self, name: str) -> Optional[Actor]: 101 for actor in self._actors.values(): 102 if actor.name == name: 103 return actor 104 return None 105 106 def __len__(self): 107 return len(self._actors)
Maintains a map of all living actors and routes messages between them.
412class LLMAgent(Actor): 413 """ 414 An Actor that uses an LLM to process tasks. 415 Maintains conversation history and supports tool use. 416 """ 417 418 def __init__( 419 self, 420 llm_provider: Optional[LLMProvider] = None, 421 system_prompt: str = "You are a helpful AI agent.", 422 max_history: int = 20, 423 summarize_threshold: int = 30, 424 **kwargs, 425 ): 426 super().__init__(**kwargs) 427 self.llm = llm_provider 428 self.system_prompt = system_prompt 429 self.max_history = max_history 430 self.summarize_threshold = summarize_threshold # compress when history exceeds this 431 self._conversation_history: list[dict] = [] 432 self._history_summary: str = "" # rolling summary of compressed messages 433 self._current_task = "idle" 434 # Cost / token tracking — must be set here so subclasses (MainActor etc.) inherit them 435 self.total_input_tokens = 0 436 self.total_output_tokens = 0 437 self.total_cost_usd = 0.0 438 439 def _current_task_description(self) -> str: 440 return self._current_task 441 442 async def on_start(self): 443 # Restore conversation history and rolling summary from persistence 444 saved = self.recall("conversation_history", []) 445 clean = [] 446 for m in saved: 447 if not isinstance(m, dict): 448 continue 449 role = m.get("role", "") 450 content = m.get("content", "") 451 if role not in ("user", "assistant"): 452 continue 453 if not isinstance(content, str): 454 content = str(content) 455 if content.strip(): 456 clean.append({"role": role, "content": content}) 457 self._conversation_history = clean[-self.max_history:] 458 self._history_summary = self.recall("history_summary", "") 459 460 # Publish capability manifest so main's topic registry knows this agent exists 461 description = ( 462 getattr(self, "DESCRIPTION", None) 463 or (self.__class__.__doc__ or "").strip().split("\n")[0] 464 or self.name 465 ) 466 capabilities = getattr(self, "CAPABILITIES", []) 467 input_schema = getattr(self, "INPUT_SCHEMA", {}) 468 output_schema = getattr(self, "OUTPUT_SCHEMA", {}) 469 await self.publish_manifest( 470 description=description, 471 capabilities=capabilities, 472 input_schema=input_schema, 473 output_schema=output_schema, 474 ) 475 476 async def on_stop(self): 477 self.persist("conversation_history", self._conversation_history) 478 self.persist("history_summary", self._history_summary) 479 480 async def _maybe_summarize(self): 481 """ 482 If history exceeds summarize_threshold, compress the oldest half into a 483 rolling summary and keep only the most recent max_history messages. 484 The summary is prepended as a system-style context message when sending 485 to the LLM so no facts are lost. 486 """ 487 if len(self._conversation_history) < self.summarize_threshold: 488 return 489 if self.llm is None: 490 # No LLM — just truncate 491 self._conversation_history = self._conversation_history[-self.max_history:] 492 return 493 494 # Split: compress the older half, keep the recent half 495 split = len(self._conversation_history) // 2 496 to_compress = self._conversation_history[:split] 497 to_keep = self._conversation_history[split:] 498 499 # Build compression prompt 500 prior_summary = f"Previous summary:\n{self._history_summary}\n\n" if self._history_summary else "" 501 messages_text = "\n".join( 502 f"{m['role'].upper()}: {m['content'][:400]}" 503 for m in to_compress 504 ) 505 prompt = ( 506 f"{prior_summary}" 507 f"Summarize the following conversation segment concisely. " 508 f"Preserve: key facts, decisions, user preferences, entity names, URLs, credentials, " 509 f"any technical details mentioned. Be specific, not vague.\n\n" 510 f"{messages_text}" 511 ) 512 try: 513 summary, usage = await self.llm.complete( 514 messages=[{"role": "user", "content": prompt}], 515 system="You are a conversation summarizer. Output a dense, factual summary. No preamble.", 516 max_tokens=400, 517 ) 518 self.total_input_tokens += usage.get("input_tokens", 0) 519 self.total_output_tokens += usage.get("output_tokens", 0) 520 self.total_cost_usd += usage.get("cost_usd", 0.0) 521 self._history_summary = summary.strip() 522 self._conversation_history = to_keep 523 self.persist("history_summary", self._history_summary) 524 self.persist("conversation_history", self._conversation_history) 525 logger.info(f"[{self.name}] History summarized: {len(to_compress)} messages → summary ({len(summary)} chars), keeping {len(to_keep)}") 526 except Exception as e: 527 logger.warning(f"[{self.name}] Summarization failed: {e} — truncating instead") 528 self._conversation_history = self._conversation_history[-self.max_history:] 529 530 def _build_messages_with_summary(self, n: int) -> list[dict]: 531 """ 532 Build the message list to send to the LLM, prepending the rolling summary 533 as context if one exists. 534 """ 535 recent = self._conversation_history[-n:] 536 if not self._history_summary: 537 return recent 538 # Inject summary as a user/assistant exchange so it fits the messages format 539 summary_ctx = [{ 540 "role": "user", 541 "content": f"[Context from earlier in our conversation]\n{self._history_summary}" 542 }, { 543 "role": "assistant", 544 "content": "Understood, I have that context." 545 }] 546 return summary_ctx + recent 547 548 async def handle_message(self, msg: Message): 549 if msg.type == MessageType.TASK: 550 await self._handle_task(msg) 551 552 async def _handle_task(self, msg: Message): 553 if isinstance(msg.payload, dict): 554 # Accept "text", "task", "message", or fall back to JSON dump 555 task_text = ( 556 msg.payload.get("text") 557 or msg.payload.get("task") 558 or msg.payload.get("message") 559 or msg.payload.get("query") 560 or str(msg.payload) 561 ) 562 else: 563 task_text = str(msg.payload) if msg.payload is not None else "" 564 self._current_task = task_text[:60] 565 566 if self.llm is None: 567 logger.warning(f"[{self.name}] No LLM provider configured.") 568 return 569 570 start = time.time() 571 try: 572 self._conversation_history.append({"role": "user", "content": task_text}) 573 574 response, _usage = await self.llm.complete( 575 messages=self._conversation_history[-self.max_history:], 576 system=self.system_prompt, 577 ) 578 579 self._conversation_history.append({"role": "assistant", "content": response}) 580 self.metrics.tasks_completed += 1 581 duration = time.time() - start 582 583 # Persist after each exchange 584 self.persist("conversation_history", self._conversation_history) 585 586 # Publish completion 587 await self._mqtt_publish( 588 f"agents/{self.actor_id}/completed", 589 { 590 "result_preview": response[:200], 591 "duration": duration, 592 "task": task_text[:60], 593 }, 594 ) 595 596 # Reply to sender — echo _task_id so send_to() futures resolve 597 payload_dict = msg.payload if isinstance(msg.payload, dict) else {} 598 task_id = payload_dict.get("_task_id") 599 reply_to = payload_dict.get("_reply_to") or msg.reply_to or msg.sender_id 600 if reply_to: 601 result = {"text": response, "task": task_text, "duration": duration} 602 if task_id: 603 result["_task_id"] = task_id 604 await self.send(reply_to, MessageType.RESULT, result) 605 606 except Exception as e: 607 self.metrics.tasks_failed += 1 608 self.state_value = "failed_task" 609 logger.error(f"[{self.name}] LLM task failed: {e}", exc_info=True) 610 611 finally: 612 self._current_task = "idle" 613 614 async def chat(self, user_message: str) -> str: 615 """Direct async call - useful for the main conversation actor.""" 616 if self.llm is None: 617 return "[No LLM configured]" 618 619 self.metrics.messages_processed += 1 620 self._conversation_history.append({"role": "user", "content": user_message}) 621 622 safe_history = [ 623 {"role": m["role"], "content": str(m["content"])} 624 for m in self._build_messages_with_summary(self.max_history) 625 if isinstance(m, dict) 626 and m.get("role") in ("user", "assistant") 627 and m.get("content") is not None 628 ] 629 response, usage = await self.llm.complete( 630 messages=safe_history, 631 system=self.system_prompt, 632 ) 633 self._conversation_history.append({"role": "assistant", "content": response}) 634 await self._maybe_summarize() 635 self.persist("conversation_history", self._conversation_history) 636 637 # Accumulate token usage and cost 638 self.total_input_tokens += usage.get("input_tokens", 0) 639 self.total_output_tokens += usage.get("output_tokens", 0) 640 self.total_cost_usd += usage.get("cost_usd", 0.0) 641 642 await self._mqtt_publish( 643 f"agents/{self.actor_id}/metrics", 644 self._build_metrics(), 645 ) 646 return response 647 648 async def chat_stream(self, user_message: str): 649 """ 650 Streaming version of chat(). Yields text chunks, then a final usage dict. 651 The caller is responsible for printing chunks as they arrive. 652 653 Usage: 654 async for chunk in agent.chat_stream("hello"): 655 if isinstance(chunk, dict): 656 usage = chunk # final usage summary 657 else: 658 print(chunk, end="", flush=True) 659 """ 660 if self.llm is None or not hasattr(self.llm, "stream"): 661 # Fallback: non-streaming — yield whole response as single chunk 662 response = await self.chat(user_message) 663 yield response 664 return 665 666 self.metrics.messages_processed += 1 667 self._conversation_history.append({"role": "user", "content": user_message}) 668 669 full_text = [] 670 usage = {} 671 672 safe_history = [ 673 {"role": m["role"], "content": str(m["content"])} 674 for m in self._build_messages_with_summary(self.max_history) 675 if isinstance(m, dict) 676 and m.get("role") in ("user", "assistant") 677 and m.get("content") is not None 678 ] 679 async for chunk in self.llm.stream( 680 messages=safe_history, 681 system=self.system_prompt, 682 ): 683 if isinstance(chunk, dict): 684 usage = chunk 685 else: 686 full_text.append(chunk) 687 yield chunk 688 689 response = "".join(full_text) 690 self._conversation_history.append({"role": "assistant", "content": response}) 691 await self._maybe_summarize() 692 self.persist("conversation_history", self._conversation_history) 693 694 self.total_input_tokens += usage.get("input_tokens", 0) 695 self.total_output_tokens += usage.get("output_tokens", 0) 696 self.total_cost_usd += usage.get("cost_usd", 0.0) 697 698 await self._mqtt_publish( 699 f"agents/{self.actor_id}/metrics", 700 self._build_metrics(), 701 ) 702 703 # Yield final usage dict so caller can log it 704 yield usage 705 706 def _build_metrics(self) -> dict: 707 m = super()._build_metrics() 708 m["input_tokens"] = self.total_input_tokens 709 m["output_tokens"] = self.total_output_tokens 710 m["cost_usd"] = round(self.total_cost_usd, 6) 711 return m 712 713 def clear_history(self): 714 self._conversation_history = []
An Actor that uses an LLM to process tasks. Maintains conversation history and supports tool use.
418 def __init__( 419 self, 420 llm_provider: Optional[LLMProvider] = None, 421 system_prompt: str = "You are a helpful AI agent.", 422 max_history: int = 20, 423 summarize_threshold: int = 30, 424 **kwargs, 425 ): 426 super().__init__(**kwargs) 427 self.llm = llm_provider 428 self.system_prompt = system_prompt 429 self.max_history = max_history 430 self.summarize_threshold = summarize_threshold # compress when history exceeds this 431 self._conversation_history: list[dict] = [] 432 self._history_summary: str = "" # rolling summary of compressed messages 433 self._current_task = "idle" 434 # Cost / token tracking — must be set here so subclasses (MainActor etc.) inherit them 435 self.total_input_tokens = 0 436 self.total_output_tokens = 0 437 self.total_cost_usd = 0.0
442 async def on_start(self): 443 # Restore conversation history and rolling summary from persistence 444 saved = self.recall("conversation_history", []) 445 clean = [] 446 for m in saved: 447 if not isinstance(m, dict): 448 continue 449 role = m.get("role", "") 450 content = m.get("content", "") 451 if role not in ("user", "assistant"): 452 continue 453 if not isinstance(content, str): 454 content = str(content) 455 if content.strip(): 456 clean.append({"role": role, "content": content}) 457 self._conversation_history = clean[-self.max_history:] 458 self._history_summary = self.recall("history_summary", "") 459 460 # Publish capability manifest so main's topic registry knows this agent exists 461 description = ( 462 getattr(self, "DESCRIPTION", None) 463 or (self.__class__.__doc__ or "").strip().split("\n")[0] 464 or self.name 465 ) 466 capabilities = getattr(self, "CAPABILITIES", []) 467 input_schema = getattr(self, "INPUT_SCHEMA", {}) 468 output_schema = getattr(self, "OUTPUT_SCHEMA", {}) 469 await self.publish_manifest( 470 description=description, 471 capabilities=capabilities, 472 input_schema=input_schema, 473 output_schema=output_schema, 474 )
Called when actor starts. Override for init logic.
476 async def on_stop(self): 477 self.persist("conversation_history", self._conversation_history) 478 self.persist("history_summary", self._history_summary)
Called when actor stops. Override for cleanup.
548 async def handle_message(self, msg: Message): 549 if msg.type == MessageType.TASK: 550 await self._handle_task(msg)
Handle messages not caught by default handlers.
614 async def chat(self, user_message: str) -> str: 615 """Direct async call - useful for the main conversation actor.""" 616 if self.llm is None: 617 return "[No LLM configured]" 618 619 self.metrics.messages_processed += 1 620 self._conversation_history.append({"role": "user", "content": user_message}) 621 622 safe_history = [ 623 {"role": m["role"], "content": str(m["content"])} 624 for m in self._build_messages_with_summary(self.max_history) 625 if isinstance(m, dict) 626 and m.get("role") in ("user", "assistant") 627 and m.get("content") is not None 628 ] 629 response, usage = await self.llm.complete( 630 messages=safe_history, 631 system=self.system_prompt, 632 ) 633 self._conversation_history.append({"role": "assistant", "content": response}) 634 await self._maybe_summarize() 635 self.persist("conversation_history", self._conversation_history) 636 637 # Accumulate token usage and cost 638 self.total_input_tokens += usage.get("input_tokens", 0) 639 self.total_output_tokens += usage.get("output_tokens", 0) 640 self.total_cost_usd += usage.get("cost_usd", 0.0) 641 642 await self._mqtt_publish( 643 f"agents/{self.actor_id}/metrics", 644 self._build_metrics(), 645 ) 646 return response
Direct async call - useful for the main conversation actor.
648 async def chat_stream(self, user_message: str): 649 """ 650 Streaming version of chat(). Yields text chunks, then a final usage dict. 651 The caller is responsible for printing chunks as they arrive. 652 653 Usage: 654 async for chunk in agent.chat_stream("hello"): 655 if isinstance(chunk, dict): 656 usage = chunk # final usage summary 657 else: 658 print(chunk, end="", flush=True) 659 """ 660 if self.llm is None or not hasattr(self.llm, "stream"): 661 # Fallback: non-streaming — yield whole response as single chunk 662 response = await self.chat(user_message) 663 yield response 664 return 665 666 self.metrics.messages_processed += 1 667 self._conversation_history.append({"role": "user", "content": user_message}) 668 669 full_text = [] 670 usage = {} 671 672 safe_history = [ 673 {"role": m["role"], "content": str(m["content"])} 674 for m in self._build_messages_with_summary(self.max_history) 675 if isinstance(m, dict) 676 and m.get("role") in ("user", "assistant") 677 and m.get("content") is not None 678 ] 679 async for chunk in self.llm.stream( 680 messages=safe_history, 681 system=self.system_prompt, 682 ): 683 if isinstance(chunk, dict): 684 usage = chunk 685 else: 686 full_text.append(chunk) 687 yield chunk 688 689 response = "".join(full_text) 690 self._conversation_history.append({"role": "assistant", "content": response}) 691 await self._maybe_summarize() 692 self.persist("conversation_history", self._conversation_history) 693 694 self.total_input_tokens += usage.get("input_tokens", 0) 695 self.total_output_tokens += usage.get("output_tokens", 0) 696 self.total_cost_usd += usage.get("cost_usd", 0.0) 697 698 await self._mqtt_publish( 699 f"agents/{self.actor_id}/metrics", 700 self._build_metrics(), 701 ) 702 703 # Yield final usage dict so caller can log it 704 yield usage
Streaming version of chat(). Yields text chunks, then a final usage dict. The caller is responsible for printing chunks as they arrive.
Usage: async for chunk in agent.chat_stream("hello"): if isinstance(chunk, dict): usage = chunk # final usage summary else: print(chunk, end="", flush=True)
64class AnthropicProvider(LLMProvider): 65 def __init__(self, model: str = "claude-sonnet-4-6", api_key: Optional[str] = None): 66 import anthropic 67 self.client = anthropic.AsyncAnthropic(api_key=api_key) 68 self.model = model 69 70 async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]: 71 response = await self.client.messages.create( 72 model=self.model, 73 max_tokens=kwargs.get("max_tokens", 4096), 74 system=system, 75 messages=messages, 76 ) 77 text = response.content[0].text 78 usage = { 79 "input_tokens": response.usage.input_tokens, 80 "output_tokens": response.usage.output_tokens, 81 "cost_usd": _calc_cost(self.model, 82 response.usage.input_tokens, 83 response.usage.output_tokens), 84 } 85 return text, usage 86 87 async def stream(self, messages: list[dict], system: str = "", **kwargs): 88 """Yield text chunks as they arrive. Final item is a dict with usage.""" 89 input_tokens = output_tokens = 0 90 async with self.client.messages.stream( 91 model=self.model, 92 max_tokens=kwargs.get("max_tokens", 4096), 93 system=system, 94 messages=messages, 95 ) as s: 96 async for chunk in s.text_stream: 97 yield chunk 98 # Final message has usage counts 99 final = await s.get_final_message() 100 input_tokens = final.usage.input_tokens 101 output_tokens = final.usage.output_tokens 102 yield { 103 "input_tokens": input_tokens, 104 "output_tokens": output_tokens, 105 "cost_usd": _calc_cost(self.model, input_tokens, output_tokens), 106 }
Base class for LLM providers.
70 async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]: 71 response = await self.client.messages.create( 72 model=self.model, 73 max_tokens=kwargs.get("max_tokens", 4096), 74 system=system, 75 messages=messages, 76 ) 77 text = response.content[0].text 78 usage = { 79 "input_tokens": response.usage.input_tokens, 80 "output_tokens": response.usage.output_tokens, 81 "cost_usd": _calc_cost(self.model, 82 response.usage.input_tokens, 83 response.usage.output_tokens), 84 } 85 return text, usage
Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}
87 async def stream(self, messages: list[dict], system: str = "", **kwargs): 88 """Yield text chunks as they arrive. Final item is a dict with usage.""" 89 input_tokens = output_tokens = 0 90 async with self.client.messages.stream( 91 model=self.model, 92 max_tokens=kwargs.get("max_tokens", 4096), 93 system=system, 94 messages=messages, 95 ) as s: 96 async for chunk in s.text_stream: 97 yield chunk 98 # Final message has usage counts 99 final = await s.get_final_message() 100 input_tokens = final.usage.input_tokens 101 output_tokens = final.usage.output_tokens 102 yield { 103 "input_tokens": input_tokens, 104 "output_tokens": output_tokens, 105 "cost_usd": _calc_cost(self.model, input_tokens, output_tokens), 106 }
Yield text chunks as they arrive. Final item is a dict with usage.
109class OpenAIProvider(LLMProvider): 110 def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None): 111 import openai 112 self.client = openai.AsyncOpenAI(api_key=api_key) 113 self.model = model 114 115 async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]: 116 full_messages = ([{"role": "system", "content": system}] if system else []) + messages 117 response = await self.client.chat.completions.create( 118 model=self.model, 119 messages=full_messages, 120 max_completion_tokens=kwargs.get("max_tokens", 4096), 121 ) 122 text = response.choices[0].message.content 123 usage = { 124 "input_tokens": response.usage.prompt_tokens, 125 "output_tokens": response.usage.completion_tokens, 126 "cost_usd": _calc_cost(self.model, 127 response.usage.prompt_tokens, 128 response.usage.completion_tokens), 129 } 130 return text, usage 131 132 async def stream(self, messages: list[dict], system: str = "", **kwargs): 133 """Yield text chunks as they arrive. Final item is a dict with usage.""" 134 full_messages = ([{"role": "system", "content": system}] if system else []) + messages 135 input_tokens = output_tokens = 0 136 async with await self.client.chat.completions.create( 137 model=self.model, 138 messages=full_messages, 139 max_completion_tokens=kwargs.get("max_tokens", 4096), 140 stream=True, 141 stream_options={"include_usage": True}, 142 ) as s: 143 async for chunk in s: 144 delta = chunk.choices[0].delta.content if chunk.choices else None 145 if delta: 146 yield delta 147 if chunk.usage: 148 input_tokens = chunk.usage.prompt_tokens 149 output_tokens = chunk.usage.completion_tokens 150 yield { 151 "input_tokens": input_tokens, 152 "output_tokens": output_tokens, 153 "cost_usd": _calc_cost(self.model, input_tokens, output_tokens), 154 }
Base class for LLM providers.
115 async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]: 116 full_messages = ([{"role": "system", "content": system}] if system else []) + messages 117 response = await self.client.chat.completions.create( 118 model=self.model, 119 messages=full_messages, 120 max_completion_tokens=kwargs.get("max_tokens", 4096), 121 ) 122 text = response.choices[0].message.content 123 usage = { 124 "input_tokens": response.usage.prompt_tokens, 125 "output_tokens": response.usage.completion_tokens, 126 "cost_usd": _calc_cost(self.model, 127 response.usage.prompt_tokens, 128 response.usage.completion_tokens), 129 } 130 return text, usage
Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}
132 async def stream(self, messages: list[dict], system: str = "", **kwargs): 133 """Yield text chunks as they arrive. Final item is a dict with usage.""" 134 full_messages = ([{"role": "system", "content": system}] if system else []) + messages 135 input_tokens = output_tokens = 0 136 async with await self.client.chat.completions.create( 137 model=self.model, 138 messages=full_messages, 139 max_completion_tokens=kwargs.get("max_tokens", 4096), 140 stream=True, 141 stream_options={"include_usage": True}, 142 ) as s: 143 async for chunk in s: 144 delta = chunk.choices[0].delta.content if chunk.choices else None 145 if delta: 146 yield delta 147 if chunk.usage: 148 input_tokens = chunk.usage.prompt_tokens 149 output_tokens = chunk.usage.completion_tokens 150 yield { 151 "input_tokens": input_tokens, 152 "output_tokens": output_tokens, 153 "cost_usd": _calc_cost(self.model, input_tokens, output_tokens), 154 }
Yield text chunks as they arrive. Final item is a dict with usage.
157class OllamaProvider(LLMProvider): 158 """Local LLM via Ollama.""" 159 def __init__(self, model: str = "llama3", base_url: str = "http://localhost:11434"): 160 self.model = model 161 self.base_url = base_url 162 163 async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]: 164 import aiohttp 165 payload = {"model": self.model, "messages": messages, "stream": False} 166 if system: 167 payload["system"] = system 168 async with aiohttp.ClientSession() as session: 169 async with session.post(f"{self.base_url}/api/chat", json=payload) as resp: 170 data = await resp.json() 171 text = data["message"]["content"] 172 prompt_eval = data.get("prompt_eval_count", 0) 173 eval_count = data.get("eval_count", 0) 174 usage = {"input_tokens": prompt_eval, "output_tokens": eval_count, "cost_usd": 0.0} 175 return text, usage 176 177 async def stream(self, messages: list[dict], system: str = "", **kwargs): 178 """Yield text chunks as they arrive. Final item is a dict with usage.""" 179 import aiohttp, json as _json 180 payload = {"model": self.model, "messages": messages, "stream": True} 181 if system: 182 payload["system"] = system 183 input_tokens = output_tokens = 0 184 async with aiohttp.ClientSession() as session: 185 async with session.post(f"{self.base_url}/api/chat", json=payload) as resp: 186 async for raw in resp.content: 187 if not raw.strip(): 188 continue 189 try: 190 data = _json.loads(raw) 191 except Exception: 192 continue 193 delta = (data.get("message") or {}).get("content", "") 194 if delta: 195 yield delta 196 if data.get("done"): 197 input_tokens = data.get("prompt_eval_count", 0) 198 output_tokens = data.get("eval_count", 0) 199 yield {"input_tokens": input_tokens, "output_tokens": output_tokens, "cost_usd": 0.0}
Local LLM via Ollama.
163 async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]: 164 import aiohttp 165 payload = {"model": self.model, "messages": messages, "stream": False} 166 if system: 167 payload["system"] = system 168 async with aiohttp.ClientSession() as session: 169 async with session.post(f"{self.base_url}/api/chat", json=payload) as resp: 170 data = await resp.json() 171 text = data["message"]["content"] 172 prompt_eval = data.get("prompt_eval_count", 0) 173 eval_count = data.get("eval_count", 0) 174 usage = {"input_tokens": prompt_eval, "output_tokens": eval_count, "cost_usd": 0.0} 175 return text, usage
Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}
177 async def stream(self, messages: list[dict], system: str = "", **kwargs): 178 """Yield text chunks as they arrive. Final item is a dict with usage.""" 179 import aiohttp, json as _json 180 payload = {"model": self.model, "messages": messages, "stream": True} 181 if system: 182 payload["system"] = system 183 input_tokens = output_tokens = 0 184 async with aiohttp.ClientSession() as session: 185 async with session.post(f"{self.base_url}/api/chat", json=payload) as resp: 186 async for raw in resp.content: 187 if not raw.strip(): 188 continue 189 try: 190 data = _json.loads(raw) 191 except Exception: 192 continue 193 delta = (data.get("message") or {}).get("content", "") 194 if delta: 195 yield delta 196 if data.get("done"): 197 input_tokens = data.get("prompt_eval_count", 0) 198 output_tokens = data.get("eval_count", 0) 199 yield {"input_tokens": input_tokens, "output_tokens": output_tokens, "cost_usd": 0.0}
Yield text chunks as they arrive. Final item is a dict with usage.
202class NIMProvider(LLMProvider): 203 """ 204 NVIDIA NIM — OpenAI-compatible API hosted at integrate.api.nvidia.com. 205 Free tier: 1000 requests/month per model. No local GPU required. 206 207 Popular free models: 208 meta/llama-3.1-8b-instruct — fast, lightweight 209 meta/llama-3.3-70b-instruct — strong general purpose 210 mistralai/mistral-7b-instruct-v0.3 — fast & capable 211 mistralai/mixtral-8x7b-instruct-v0.1 212 google/gemma-3-27b-it 213 microsoft/phi-3-mini-128k-instruct 214 deepseek-ai/deepseek-r1 — reasoning model 215 deepseek-ai/deepseek-r1-distill-qwen-7b 216 nvidia/llama-3.1-nemotron-70b-instruct 217 nvidia/llama-3.3-nemotron-super-49b-v1 218 219 Get a free API key at: https://build.nvidia.com 220 """ 221 222 NIM_BASE_URL = "https://integrate.api.nvidia.com/v1" 223 224 def __init__( 225 self, 226 model: str = "meta/llama-3.3-70b-instruct", 227 api_key: Optional[str] = None, 228 base_url: str = NIM_BASE_URL, 229 ): 230 import openai 231 self.model = model 232 self.client = openai.AsyncOpenAI( 233 api_key=api_key or "dummy", # NIM free tier may not require a key locally 234 base_url=base_url, 235 ) 236 237 async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]: 238 full_messages = ([{"role": "system", "content": system}] if system else []) + messages 239 response = await self.client.chat.completions.create( 240 model=self.model, 241 messages=full_messages, 242 max_tokens=kwargs.get("max_tokens", 4096), 243 ) 244 text = response.choices[0].message.content 245 input_tok = response.usage.prompt_tokens if response.usage else 0 246 output_tok = response.usage.completion_tokens if response.usage else 0 247 usage = { 248 "input_tokens": input_tok, 249 "output_tokens": output_tok, 250 "cost_usd": _calc_cost(self.model, input_tok, output_tok), 251 } 252 return text, usage 253 254 async def stream(self, messages: list[dict], system: str = "", **kwargs): 255 """Yield text chunks as they arrive. Final item is a dict with usage.""" 256 full_messages = ([{"role": "system", "content": system}] if system else []) + messages 257 input_tokens = output_tokens = 0 258 async with await self.client.chat.completions.create( 259 model=self.model, 260 messages=full_messages, 261 max_tokens=kwargs.get("max_tokens", 4096), 262 stream=True, 263 ) as s: 264 async for chunk in s: 265 delta = chunk.choices[0].delta.content if chunk.choices else None 266 if delta: 267 yield delta 268 if chunk.usage: 269 input_tokens = chunk.usage.prompt_tokens 270 output_tokens = chunk.usage.completion_tokens 271 yield { 272 "input_tokens": input_tokens, 273 "output_tokens": output_tokens, 274 "cost_usd": _calc_cost(self.model, input_tokens, output_tokens), 275 }
NVIDIA NIM — OpenAI-compatible API hosted at integrate.api.nvidia.com. Free tier: 1000 requests/month per model. No local GPU required.
Popular free models: meta/llama-3.1-8b-instruct — fast, lightweight meta/llama-3.3-70b-instruct — strong general purpose mistralai/mistral-7b-instruct-v0.3 — fast & capable mistralai/mixtral-8x7b-instruct-v0.1 google/gemma-3-27b-it microsoft/phi-3-mini-128k-instruct deepseek-ai/deepseek-r1 — reasoning model deepseek-ai/deepseek-r1-distill-qwen-7b nvidia/llama-3.1-nemotron-70b-instruct nvidia/llama-3.3-nemotron-super-49b-v1
Get a free API key at: https://build.nvidia.com
224 def __init__( 225 self, 226 model: str = "meta/llama-3.3-70b-instruct", 227 api_key: Optional[str] = None, 228 base_url: str = NIM_BASE_URL, 229 ): 230 import openai 231 self.model = model 232 self.client = openai.AsyncOpenAI( 233 api_key=api_key or "dummy", # NIM free tier may not require a key locally 234 base_url=base_url, 235 )
237 async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]: 238 full_messages = ([{"role": "system", "content": system}] if system else []) + messages 239 response = await self.client.chat.completions.create( 240 model=self.model, 241 messages=full_messages, 242 max_tokens=kwargs.get("max_tokens", 4096), 243 ) 244 text = response.choices[0].message.content 245 input_tok = response.usage.prompt_tokens if response.usage else 0 246 output_tok = response.usage.completion_tokens if response.usage else 0 247 usage = { 248 "input_tokens": input_tok, 249 "output_tokens": output_tok, 250 "cost_usd": _calc_cost(self.model, input_tok, output_tok), 251 } 252 return text, usage
Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}
254 async def stream(self, messages: list[dict], system: str = "", **kwargs): 255 """Yield text chunks as they arrive. Final item is a dict with usage.""" 256 full_messages = ([{"role": "system", "content": system}] if system else []) + messages 257 input_tokens = output_tokens = 0 258 async with await self.client.chat.completions.create( 259 model=self.model, 260 messages=full_messages, 261 max_tokens=kwargs.get("max_tokens", 4096), 262 stream=True, 263 ) as s: 264 async for chunk in s: 265 delta = chunk.choices[0].delta.content if chunk.choices else None 266 if delta: 267 yield delta 268 if chunk.usage: 269 input_tokens = chunk.usage.prompt_tokens 270 output_tokens = chunk.usage.completion_tokens 271 yield { 272 "input_tokens": input_tokens, 273 "output_tokens": output_tokens, 274 "cost_usd": _calc_cost(self.model, input_tokens, output_tokens), 275 }
Yield text chunks as they arrive. Final item is a dict with usage.
644class MainActor(LLMAgent): 645 DESCRIPTION = "Main orchestrator: spawns agents, routes tasks, manages the multi-agent system" 646 CAPABILITIES = ["spawn_agent", "list_agents", "list_nodes", "list_topics", "orchestration"] 647 648 INTENT_CLASSIFIER_PROMPT = ( 649 "You are a routing classifier for a smart home AI assistant.\n" 650 "Respond with exactly one token: HA, PIPELINE, or OTHER.\n\n" 651 "HA = a direct, one-shot Home Assistant action or query:\n" 652 " - Turn on/off a device right now\n" 653 " - List devices, areas, entities, automations\n" 654 " - Create/edit/delete a HA automation\n" 655 " - Set temperature, dim lights, lock door — immediate action\n\n" 656 "PIPELINE = a reactive rule that should run continuously:\n" 657 " - 'if X happens then do Y' — any conditional/reactive logic\n" 658 " - 'when X send me a message/notification'\n" 659 " - 'whenever X turns on/off do Y'\n" 660 " - Any rule involving a sensor state change triggering an action or notification\n" 661 " - Any webcam/camera detection triggering anything\n" 662 " - Anything involving Discord/Telegram notifications triggered by an event\n\n" 663 "OTHER = general conversation, coding, questions, anything not HA or pipeline related." 664 ) 665 666 def __init__(self, llm_provider: Optional[LLMProvider] = None, **kwargs): 667 kwargs.setdefault("name", "main") 668 kwargs.setdefault("system_prompt", ORCHESTRATOR_PROMPT) 669 super().__init__(llm_provider=llm_provider, **kwargs) 670 self._result_futures: dict[str, asyncio.Future] = {} 671 # Queued monitor notifications — prepended to next user response 672 self._pending_notifications: list[dict] = [] 673 self.protected = True 674 # Remote node tracking: node_name → {"last_seen": float, "agents": [...]} 675 self._known_nodes: dict[str, dict] = {} 676 # Topic registry: topic → [manifest, ...] — built from agents/+/manifest 677 self._topic_registry: dict[str, list] = {} # topic → list of agent manifests 678 self._agent_manifests: dict[str, dict] = {} # agent name → latest manifest (includes schemas) 679 680 # ── Lifecycle ────────────────────────────────────────────────────────── 681 682 async def on_start(self): 683 await super().on_start() 684 await self._restore_spawned_agents() 685 # Listen for remote node heartbeats so we know what's online 686 self._tasks.append(asyncio.create_task(self._node_heartbeat_listener())) 687 # Listen for agent capability manifests to build topic registry 688 self._tasks.append(asyncio.create_task(self._manifest_listener())) 689 # Inject persisted user facts into system prompt 690 self._inject_user_facts_into_prompt() 691 692 # ── Spawn registry ───────────────────────────────────────────────────── 693 694 def _get_spawn_registry(self) -> dict: 695 return self.recall(SPAWN_REGISTRY_KEY) or {} 696 697 def _save_to_spawn_registry(self, config: dict): 698 reg = self._get_spawn_registry() 699 reg[config["name"]] = config 700 self.persist(SPAWN_REGISTRY_KEY, reg) 701 logger.info(f"[{self.name}] Spawn registry: {list(reg.keys())}") 702 703 def _remove_from_spawn_registry(self, name: str): 704 reg = self._get_spawn_registry() 705 if name in reg: 706 del reg[name] 707 self.persist(SPAWN_REGISTRY_KEY, reg) 708 logger.info(f"[{self.name}] Removed '{name}' from spawn registry.") 709 710 # ── Pipeline rules registry ──────────────────────────────────────────── 711 # Stores grouped rules: one entry per user request, listing all agents spawned for it. 712 # Schema: { rule_id: { "rule_id", "task", "agents": [str], "created_at": float } } 713 714 def get_pipeline_rules(self) -> dict: 715 return self.recall(PIPELINE_RULES_KEY) or {} 716 717 def save_pipeline_rule(self, rule: dict): 718 rules = self.get_pipeline_rules() 719 rules[rule["rule_id"]] = rule 720 self.persist(PIPELINE_RULES_KEY, rules) 721 logger.info(f"[{self.name}] Pipeline rule saved: {rule['rule_id']} agents={rule.get('agents', [])}") 722 723 def get_notification_urls(self) -> dict: 724 """Return persisted notification webhook URLs (discord, telegram, slack, etc.)""" 725 return self.recall("_notification_urls") or {} 726 727 # ── User facts ───────────────────────────────────────────────────────── 728 # Key facts extracted from conversation: HA URL, entity names, preferences, 729 # user name, webhook URLs, etc. Stored separately from history so they 730 # survive summarization and persist indefinitely. 731 732 _FACTS_EXTRACT_PROMPT = ( 733 "Extract durable facts from this conversation exchange that would be useful to remember " 734 "long-term. Focus on: names, locations, device entity IDs, URLs, credentials, preferences, " 735 "configurations, and any explicit statements about the user's setup.\n" 736 "Return a JSON object with short descriptive keys and concise values. " 737 "Return {} if nothing worth remembering was said.\n" 738 "Example: {\"ha_url\": \"http://192.168.1.10:8123\", \"user_name\": \"Alex\", " 739 "\"living_room_light\": \"light.wiz_rgbw_tunable_02cba0\"}\n" 740 "Output only valid JSON. No explanation, no markdown." 741 ) 742 743 def get_user_facts(self) -> dict: 744 return self.recall("_user_facts") or {} 745 746 def _inject_user_facts_into_prompt(self): 747 """Prepend known user facts to the system prompt so the LLM always has them.""" 748 facts = self.get_user_facts() 749 if not facts: 750 return 751 facts_lines = "\n".join(f" {k}: {v}" for k, v in facts.items()) 752 facts_block = f"\n\n== KNOWN USER FACTS (always keep in mind) ==\n{facts_lines}" 753 # Avoid duplicating if already injected 754 marker = "== KNOWN USER FACTS" 755 base_prompt = ORCHESTRATOR_PROMPT 756 if marker in self.system_prompt: 757 # Replace existing facts block 758 self.system_prompt = base_prompt + facts_block 759 else: 760 self.system_prompt = self.system_prompt + facts_block 761 762 async def _extract_and_save_facts(self, user_message: str, assistant_response: str): 763 """After each exchange, ask the LLM to extract any new durable facts.""" 764 if self.llm is None: 765 return 766 exchange = f"USER: {user_message[:600]}\nASSISTANT: {assistant_response[:600]}" 767 try: 768 raw, _ = await self.llm.complete( 769 messages=[{"role": "user", "content": exchange}], 770 system=self._FACTS_EXTRACT_PROMPT, 771 max_tokens=200, 772 ) 773 import json as _json, re as _re 774 clean = raw.strip().lstrip("```json").lstrip("```").rstrip("```").strip() 775 new_facts = _json.loads(clean) 776 if not isinstance(new_facts, dict) or not new_facts: 777 return 778 # Merge with existing facts 779 facts = self.get_user_facts() 780 facts.update(new_facts) 781 self.persist("_user_facts", facts) 782 self._inject_user_facts_into_prompt() 783 logger.info(f"[{self.name}] User facts updated: {list(new_facts.keys())}") 784 except Exception as e: 785 logger.debug(f"[{self.name}] Facts extraction skipped: {e}") 786 787 async def delete_pipeline_rule(self, rule_id: str) -> str: 788 """Stop all agents for a rule and remove it from registry.""" 789 rules = self.get_pipeline_rules() 790 rule = rules.get(rule_id) 791 if not rule: 792 return f"No rule found with id '{rule_id}'." 793 agents = rule.get("agents", []) 794 stopped = [] 795 for agent_name in agents: 796 self._remove_from_spawn_registry(agent_name) 797 if self._registry: 798 actor = self._registry.find_by_name(agent_name) 799 if actor: 800 await actor.stop() 801 await self._registry.unregister(actor.actor_id) 802 stopped.append(agent_name) 803 del rules[rule_id] 804 self.persist(PIPELINE_RULES_KEY, rules) 805 task_preview = rule.get("task", "")[:60] 806 return f"Rule '{rule_id}' deleted. Stopped agents: {', '.join(stopped) or 'none running'}.\nRule was: {task_preview}" 807 808 async def _restore_spawned_agents(self): 809 reg = self._get_spawn_registry() 810 if not reg: 811 return 812 logger.info(f"[{self.name}] Restoring {len(reg)} agent(s): {list(reg.keys())}") 813 for name, config in reg.items(): 814 node = config.get("node", "").strip() 815 if node: 816 # Remote agent — re-publish spawn to its node; no local object expected 817 logger.info(f"[{self.name}] Re-spawning remote agent '{name}' on node '{node}'") 818 try: 819 await self._spawn_remote(config, node, save=False) 820 except Exception as e: 821 logger.error(f"[{self.name}] Failed to restore remote '{name}' on '{node}': {e}") 822 continue 823 if self._registry and self._registry.find_by_name(name): 824 logger.info(f"[{self.name}] '{name}' already running, skipping.") 825 continue 826 try: 827 await self._spawn_from_config(config, save=False) 828 logger.info(f"[{self.name}] Restored: {name}") 829 except Exception as e: 830 logger.error(f"[{self.name}] Failed to restore '{name}': {e}") 831 832 # ── Message handling ─────────────────────────────────────────────────── 833 834 async def handle_message(self, msg: Message): 835 if msg.type == MessageType.TASK: 836 # Intercept monitor notifications BEFORE passing to LLM _handle_task 837 if isinstance(msg.payload, dict) and msg.payload.get("_monitor_notification"): 838 self._pending_notifications.append(msg.payload) 839 logger.info(f"[{self.name}] Monitor alert queued: {msg.payload.get('message','')[:80]}") 840 return 841 await self._handle_task(msg) 842 843 elif msg.type == MessageType.RESULT: 844 if isinstance(msg.payload, dict): 845 # Support both key names: "_task_id" (new) and "task" (legacy) 846 fid = msg.payload.get("_task_id") or msg.payload.get("task") 847 if fid and fid in self._result_futures: 848 fut = self._result_futures[fid] 849 if not fut.done(): 850 fut.set_result(msg.payload) 851 852 # ── Home Automation intent detection ─────────────────────────────────── 853 854 @staticmethod 855 def _looks_like_home_automation_request(text: str) -> bool: 856 lowered = (text or "").lower() 857 if "home assistant" in lowered: 858 return True 859 if lowered.startswith("spawn ") or lowered.startswith("/"): 860 return False 861 862 # Wactorz pipeline requests — these involve external sensors/agents, not HA natively 863 # Route to planner instead of HA agent 864 _pipeline_keywords = [ 865 "camera", "webcam", "yolo", "detect", "detection", "person detect", 866 "object detect", "laptop camera", "cv2", "opencv", 867 "when detected", "if detected", "whenever detected", 868 "notify me", "send me a message", "send me a discord", 869 "discord", "telegram", "whatsapp", 870 ] 871 if any(kw in lowered for kw in _pipeline_keywords): 872 return False 873 874 has_trigger = any(token in lowered for token in [ 875 "when ", "if ", "on ", "whenever ", "after ", "before ", 876 "as soon as ", "at ", 877 ]) 878 has_action = any(token in lowered for token in [ 879 "turn on", "turn off", "open", "close", "lock", "unlock", "dim", "set", 880 ]) 881 has_automation_intent = any(token in lowered for token in [ 882 "automate", "automation", "routine", "scene", "trigger", "schedule", 883 "presence", "motion", "door", "window", "sensor", "alarm", 884 "romantic", "cozy", "ambience", "ambiance", 885 ]) 886 has_home_context = any(token in lowered for token in [ 887 "home", "house", "apartment", "room", "living room", "bedroom", 888 "kitchen", "hallway", "garage", "porch", 889 ]) 890 891 return ( 892 (has_trigger and has_action) 893 or (has_trigger and has_automation_intent) 894 or (has_automation_intent and has_home_context) 895 ) 896 897 async def _classify_intent(self, text: str) -> str: 898 """ 899 Classify user intent as HA, PIPELINE, or OTHER using a single cheap LLM call. 900 Returns one of: 'HA', 'PIPELINE', 'OTHER' 901 """ 902 if not text or text.startswith("/"): 903 return "OTHER" 904 if self.llm is None: 905 return "OTHER" 906 try: 907 decision, _ = await asyncio.wait_for( 908 self.llm.complete( 909 messages=[{"role": "user", "content": text}], 910 system=self.INTENT_CLASSIFIER_PROMPT, 911 max_tokens=4, 912 ), 913 timeout=5.0, 914 ) 915 token = (decision or "").strip().upper().split()[0] if decision else "OTHER" 916 if token in ("HA", "PIPELINE", "OTHER"): 917 return token 918 return "OTHER" 919 except Exception as e: 920 logger.debug(f"[{self.name}] Intent classification failed: {e}") 921 return "OTHER" 922 923 async def _is_home_automation_request(self, text: str) -> bool: 924 # Keep for backward compat — delegates to _classify_intent 925 intent = await self._classify_intent(text) 926 return intent == "HA" 927 928 # ── User input ───────────────────────────────────────────────────────── 929 930 async def chat(self, user_message: str) -> str: 931 response = await super().chat(user_message) 932 # Fire-and-forget fact extraction — don't block the response 933 asyncio.create_task(self._extract_and_save_facts(user_message, response)) 934 return response 935 936 async def chat_stream(self, user_message: str): 937 full_response = [] 938 async for chunk in super().chat_stream(user_message): 939 if isinstance(chunk, dict): 940 yield chunk 941 else: 942 full_response.append(chunk) 943 yield chunk 944 # Extract facts from completed response 945 if full_response: 946 asyncio.create_task( 947 self._extract_and_save_facts(user_message, "".join(full_response)) 948 ) 949 950 951 def _drain_notifications(self) -> str: 952 """Pop queued monitor notifications as a formatted prefix string.""" 953 if not self._pending_notifications: 954 return "" 955 icons = {"critical": "\U0001f534", "warning": "\U0001f7e1", "info": "\u2705"} 956 lines = [] 957 for n in self._pending_notifications: 958 icon = icons.get(n.get("severity", "warning"), "\u26a0\ufe0f") 959 lines.append(f"{icon} **System:** {n.get('message', '').strip()}") 960 self._pending_notifications.clear() 961 return "\n".join(lines) + "\n\n---\n\n" 962 963 async def process_user_input(self, text: str) -> str: 964 note_prefix = self._drain_notifications() 965 966 # ── Direct API intercepts — handle without LLM round-trip ────────── 967 stripped = text.strip().rstrip("()") 968 if stripped in ("main.list_nodes", "list_nodes", "/nodes"): 969 nodes = self.list_nodes() 970 if not nodes: 971 return note_prefix + "No remote nodes seen yet. Deploy one with /deploy <node-name>." 972 import time as _t 973 lines = [] 974 for nd in sorted(nodes, key=lambda x: x["node"]): 975 status = "🟢 online" if nd["online"] else "🔴 offline" 976 agents = ", ".join(nd["agents"]) or "(no agents)" 977 age = int(_t.time() - nd["last_seen"]) 978 lines.append(f" {nd['node']:22s} {status} | agents: {agents} | last heartbeat: {age}s ago") 979 return note_prefix + "Remote nodes:\n" + "\n".join(lines) 980 981 if stripped.startswith("/topics"): 982 keyword = stripped[7:].strip().lstrip("(").rstrip(")") 983 topics = self.list_topics(keyword) 984 if not topics: 985 msg = f"No topics found" + (f" matching '{keyword}'" if keyword else "") + "." 986 msg += " Topics are registered automatically when agents publish for the first time." 987 return note_prefix + msg 988 lines = [f"Known MQTT topics{' matching ' + repr(keyword) if keyword else ''}:"] 989 for t in topics: 990 agent_strs = ", ".join( 991 f"{a['name']}" + (f" ({a['node']})" if a.get("node") else "") 992 for a in t["agents"] 993 ) 994 lines.append(f" {t['topic']:40s} ← {agent_strs}") 995 return note_prefix + "\n".join(lines) 996 997 # ── Webhook / notification URL management ─────────────────────────── 998 if stripped.startswith("/memory"): 999 parts = stripped.split(None, 1) 1000 sub = parts[1].strip() if len(parts) > 1 else "" 1001 if sub == "clear": 1002 self.persist("_user_facts", {}) 1003 self.persist("history_summary", "") 1004 self._history_summary = "" 1005 self.system_prompt = ORCHESTRATOR_PROMPT 1006 return note_prefix + "Memory cleared — user facts and conversation summary reset." 1007 if sub.startswith("forget "): 1008 key = sub[7:].strip() 1009 facts = self.get_user_facts() 1010 if key in facts: 1011 del facts[key] 1012 self.persist("_user_facts", facts) 1013 self._inject_user_facts_into_prompt() 1014 return note_prefix + f"Forgotten: '{key}'" 1015 return note_prefix + f"No fact found with key '{key}'." 1016 # Default: show memory 1017 facts = self.get_user_facts() 1018 summary = self._history_summary 1019 lines = [] 1020 if facts: 1021 lines.append(f"User facts ({len(facts)}):") 1022 for k, v in facts.items(): 1023 lines.append(f" {k}: {v}") 1024 else: 1025 lines.append("No user facts stored yet.") 1026 if summary: 1027 lines.append(f"\nConversation summary:\n {summary[:300]}{'...' if len(summary) > 300 else ''}") 1028 else: 1029 lines.append("\nNo conversation summary yet.") 1030 lines.append("\nCommands: /memory clear | /memory forget <key>") 1031 return note_prefix + "\n".join(lines) 1032 1033 if stripped.startswith("/webhook"): 1034 parts = stripped.split(None, 2) 1035 if len(parts) == 1: 1036 # /webhook — show stored URLs 1037 urls = self.recall("_notification_urls") or {} 1038 if not urls: 1039 return note_prefix + "No notification URLs stored.\nUse: /webhook discord <url> or /webhook telegram <url>" 1040 lines = ["Stored notification URLs:"] 1041 for svc, url in urls.items(): 1042 lines.append(f" {svc}: {url}") 1043 return note_prefix + "\n".join(lines) 1044 elif len(parts) >= 3: 1045 # /webhook discord <url> 1046 service = parts[1].lower() 1047 url = parts[2].strip() 1048 urls = self.recall("_notification_urls") or {} 1049 urls[service] = url 1050 self.persist("_notification_urls", urls) 1051 return note_prefix + f"Saved {service} webhook URL. Pipelines will use it automatically." 1052 else: 1053 return note_prefix + "Usage: /webhook <service> <url>\nExample: /webhook discord https://discord.com/api/webhooks/..." 1054 1055 # Auto-detect webhook URLs in any message and persist them 1056 import re as _re 1057 _webhook_match = _re.search( 1058 r'https?://(?:discord\.com/api/webhooks|hooks\.slack\.com|api\.telegram\.org)/\S+', 1059 text 1060 ) 1061 if _webhook_match: 1062 url = _webhook_match.group(0).rstrip(".,;!)'\"") 1063 urls = self.recall("_notification_urls") or {} 1064 if "discord" in url: 1065 urls["discord"] = url 1066 elif "slack" in url: 1067 urls["slack"] = url 1068 elif "telegram" in url: 1069 urls["telegram"] = url 1070 self.persist("_notification_urls", urls) 1071 logger.info(f"[{self.name}] Auto-saved webhook URL from message") 1072 1073 if stripped in ("/rules", "rules"): 1074 rules = self.get_pipeline_rules() 1075 if not rules: 1076 return note_prefix + "No pipeline rules active.\nDescribe a reactive rule to create one, e.g. 'when the door opens send me a Discord message'." 1077 lines = [f"Active pipeline rules ({len(rules)}):"] 1078 for rule_id, rule in sorted(rules.items(), key=lambda x: x[1].get("created_at", 0)): 1079 agents = rule.get("agents", []) 1080 task = rule.get("task", "")[:80] 1081 import datetime 1082 ts = rule.get("created_at", 0) 1083 created = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M") if ts else "unknown" 1084 # Check which agents are running 1085 running_agents = [] 1086 stopped_agents = [] 1087 for a in agents: 1088 if self._registry and self._registry.find_by_name(a): 1089 running_agents.append(a) 1090 else: 1091 stopped_agents.append(a) 1092 status = "🟢" if running_agents else "🔴" 1093 lines.append(f"\n{status} [{rule_id}] — {task}") 1094 lines.append(f" agents : {', '.join(agents)}") 1095 if stopped_agents: 1096 lines.append(f" stopped : {', '.join(stopped_agents)}") 1097 lines.append(f" created : {created}") 1098 lines.append("\nTo delete a rule: /rules delete <rule_id>") 1099 return note_prefix + "\n".join(lines) 1100 1101 if stripped.startswith("/rules delete "): 1102 rule_id = stripped[len("/rules delete "):].strip() 1103 result = await self.delete_pipeline_rule(rule_id) 1104 return note_prefix + result 1105 1106 if stripped.startswith("/rules"): 1107 keyword = stripped[14:].strip().lstrip("(").rstrip(")") 1108 caps = self.list_capabilities(keyword) 1109 if not caps: 1110 msg = "No agents found" + (f" matching '{keyword}'" if keyword else "") + "." 1111 msg += " Agents publish their capabilities on startup." 1112 return note_prefix + msg 1113 lines = ["Agent capabilities" + (" matching " + repr(keyword) if keyword else "") + ":"] 1114 for a in caps: 1115 lines.append("") 1116 lines.append(" [" + a["name"] + "]" + (" on " + a["node"] if a.get("node") else "")) 1117 lines.append(" description : " + a["description"]) 1118 if a["capabilities"]: 1119 lines.append(" capabilities: " + ", ".join(a["capabilities"])) 1120 if a["input_schema"]: 1121 lines.append(" input : " + str(a["input_schema"])) 1122 if a["output_schema"]: 1123 lines.append(" output : " + str(a["output_schema"])) 1124 return note_prefix + "\n".join(lines) 1125 1126 # ── @mention direct routing ───────────────────────────────────────── 1127 if text.startswith("@"): 1128 # Extract agent name and message: "@cpu-monitor-rpi-room what is the cpu?" 1129 parts = text.split(None, 1) 1130 target_name = parts[0].lstrip("@").rstrip(":,") 1131 message = parts[1].strip() if len(parts) > 1 else text 1132 1133 # Try local registry first 1134 local_target = self._registry.find_by_name(target_name) if self._registry else None 1135 if not local_target: 1136 # Not running — check if it's a spawnable catalog recipe 1137 manifest = self._agent_manifests.get(target_name, {}) 1138 if manifest.get("spawnable") and manifest.get("catalog"): 1139 catalog_name = manifest["catalog"] 1140 catalog_actor = self._registry.find_by_name(catalog_name) if self._registry else None 1141 if catalog_actor and hasattr(catalog_actor, "_action_spawn"): 1142 logger.info(f"[main] '{target_name}' not running — auto-spawning via {catalog_name}...") 1143 try: 1144 spawn_result = await catalog_actor._action_spawn(target_name, {}) 1145 if spawn_result and spawn_result.get("ok"): 1146 await asyncio.sleep(0.5) 1147 local_target = self._registry.find_by_name(target_name) if self._registry else None 1148 logger.info(f"[main] '{target_name}' spawned, routing task...") 1149 else: 1150 err = spawn_result.get("message", "unknown error") if spawn_result else "no response" 1151 return note_prefix + f"Could not spawn '{target_name}': {err}" 1152 except Exception as e: 1153 return note_prefix + f"Could not spawn '{target_name}': {e}" 1154 1155 if local_target: 1156 result = await self.delegate_task(target_name, message, timeout=60.0) 1157 if result: 1158 reply = result.get("result") or result.get("response") or str(result) 1159 return note_prefix + f"**{target_name}**: {reply}" 1160 return note_prefix + f"{target_name} did not respond." 1161 1162 # Check if it's a known remote agent 1163 remote_node = None 1164 for node_name, nd in self._known_nodes.items(): 1165 if target_name in nd.get("agents", []): 1166 remote_node = node_name 1167 break 1168 1169 if remote_node: 1170 # Send via MQTT and wait for reply 1171 import time as _t 1172 reply_topic = f"main/reply/{self.actor_id}/{uuid.uuid4().hex[:8]}" 1173 future: asyncio.Future = asyncio.get_event_loop().create_future() 1174 self._result_futures[reply_topic] = future 1175 1176 await self._mqtt_publish( 1177 f"agents/by-name/{target_name}/task", 1178 {"text": message, "_reply_topic": reply_topic, 1179 "_remote_task": True, "payload": message}, 1180 ) 1181 1182 # Subscribe briefly for the reply 1183 async def _wait_reply(): 1184 try: 1185 import aiomqtt 1186 async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client: 1187 await client.subscribe(reply_topic) 1188 async for msg in client.messages: 1189 try: 1190 data = json.loads(msg.payload.decode()) 1191 if not future.done(): 1192 future.set_result(data) 1193 except Exception: 1194 pass 1195 return 1196 except Exception as e: 1197 if not future.done(): 1198 future.set_exception(e) 1199 1200 reply_task = asyncio.create_task(_wait_reply()) 1201 try: 1202 result = await asyncio.wait_for(asyncio.shield(future), timeout=30.0) 1203 reply_task.cancel() 1204 reply = result.get("result") or result.get("response") or str(result) 1205 return note_prefix + f"**{target_name}** (on {remote_node}): {reply}" 1206 except asyncio.TimeoutError: 1207 reply_task.cancel() 1208 return note_prefix + f"{target_name} on {remote_node} did not respond within 30s." 1209 finally: 1210 self._result_futures.pop(reply_topic, None) 1211 1212 # Not found locally or remotely 1213 known_remote = [a for nd in self._known_nodes.values() for a in nd.get("agents", [])] 1214 if known_remote: 1215 return note_prefix + (f"Agent '{target_name}' not found. " 1216 f"Remote agents: {', '.join(known_remote)}") 1217 return note_prefix + f"Agent '{target_name}' not found." 1218 1219 # Explicit planner prefix always wins 1220 lowered = text.lower() 1221 if any(lowered.startswith(p) for p in ( 1222 "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ", 1223 "@planner", "set up a pipeline", "create a rule", "set up a rule", 1224 )): 1225 result = await self._run_planner(text) 1226 return note_prefix + (result or "Planner did not return a result. Please retry.") 1227 1228 # Single LLM call classifies intent: HA (direct action), PIPELINE (reactive rule), OTHER 1229 intent = await self._classify_intent(text) 1230 logger.info(f"[{self.name}] Intent: {intent} — {text[:60]}") 1231 1232 if intent == "PIPELINE": 1233 result = await self._run_planner(text) 1234 return note_prefix + (result or "Planner did not return a result. Please retry.") 1235 1236 if intent == "HA": 1237 result = await self.delegate_task("home-assistant-agent", text, timeout=120.0) 1238 if result and isinstance(result, dict) and result.get("result"): 1239 return note_prefix + str(result["result"]) 1240 if not result: 1241 return note_prefix + "I could not reach the Home Assistant agent right now. Please retry." 1242 return note_prefix + "The Home Assistant agent did not return a result. Please retry." 1243 1244 response = await self.chat(text) 1245 1246 # If the LLM wrote agent code but forgot the <spawn> wrapper, remind it once 1247 has_spawn = "<spawn>" in response 1248 has_code = "async def handle_task" in response or "async def setup" in response 1249 asked_spawn = any(w in text.lower() for w in ("spawn", "create", "make", "build", "add", "agent")) 1250 if has_code and not has_spawn and asked_spawn: 1251 logger.info(f"[{self.name}] Code written without <spawn> — prompting to wrap it") 1252 response = await self.chat( 1253 "You wrote agent code but forgot to wrap it in a <spawn> block. " 1254 "Please output the complete spawn block now with that exact code inside it. " 1255 "Output ONLY the <spawn>...</spawn> block, nothing else." 1256 ) 1257 1258 clean, spawned = await self._process_spawn_commands(response) 1259 1260 # Execute any @agent-name {payload} delegation patterns the LLM produced 1261 clean = await self._execute_llm_delegations(clean) 1262 1263 await self._mqtt_publish( 1264 f"agents/{self.actor_id}/logs", 1265 {"type": "user_interaction", "input": text[:100], "response": clean[:200]}, 1266 ) 1267 1268 if spawned: 1269 bg_names = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)] 1270 live_names = [a.name for a in spawned if not isinstance(a, _SpawnPlaceholder)] 1271 parts = [] 1272 if live_names: 1273 replaced = '"replace": true' in response or '"replace":true' in response 1274 action = "Replaced" if replaced else "Spawned" 1275 parts.append(f"{action} {', '.join(live_names)}") 1276 if bg_names: 1277 parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly") 1278 if parts: 1279 clean += f"\n\n[System: {' | '.join(parts)} — will auto-restore on restart]" 1280 1281 return note_prefix + clean 1282 1283 async def process_user_input_stream(self, text: str): 1284 """ 1285 Streaming version of process_user_input(). 1286 Yields text chunks as the LLM generates them, then a final dict: 1287 {"done": True, "spawned": [...names...], "system_msg": "..."} 1288 1289 The CLI calls this and prints chunks immediately. 1290 REST/Discord/WhatsApp should use process_user_input() instead. 1291 """ 1292 # Drain monitor notifications first 1293 note_prefix = self._drain_notifications() 1294 if note_prefix: 1295 yield note_prefix 1296 1297 # All slash-commands and direct API intercepts are handled by process_user_input 1298 # Route them there to avoid duplicating all that logic here 1299 _stripped = text.strip().rstrip("()") 1300 _is_command = ( 1301 _stripped.startswith("/") 1302 or _stripped in ("list_nodes", "main.list_nodes", "rules") 1303 or _stripped.startswith("@") 1304 ) 1305 if _is_command: 1306 result = await self.process_user_input(text) 1307 yield result 1308 yield {"done": True, "spawned": [], "system_msg": ""} 1309 return 1310 1311 # Explicit planner prefix always wins 1312 _lowered = text.lower() 1313 if any(_lowered.startswith(p) for p in ( 1314 "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ", 1315 "@planner", "set up a pipeline", "create a rule", "set up a rule", 1316 )): 1317 result = await self._run_planner(text) 1318 yield result or "Planner did not return a result. Please retry." 1319 yield {"done": True, "spawned": [], "system_msg": ""} 1320 return 1321 1322 # Single LLM call classifies intent: HA, PIPELINE, or OTHER 1323 intent = await self._classify_intent(text) 1324 logger.info(f"[{self.name}] Intent: {intent} — {text[:60]}") 1325 1326 if intent == "PIPELINE": 1327 result = await self._run_planner(text) 1328 yield result or "Planner did not return a result. Please retry." 1329 yield {"done": True, "spawned": [], "system_msg": ""} 1330 return 1331 1332 if intent == "HA": 1333 result = await self.delegate_task("home-assistant-agent", text, timeout=120.0) 1334 if result and isinstance(result, dict) and result.get("result"): 1335 yield str(result["result"]) 1336 elif not result: 1337 yield "I could not reach the Home Assistant agent right now. Please retry." 1338 else: 1339 yield "The Home Assistant agent did not return a result. Please retry." 1340 yield {"done": True, "spawned": [], "system_msg": ""} 1341 return 1342 1343 # Stream the LLM response chunk by chunk 1344 full_chunks = [] 1345 async for chunk in self.chat_stream(text): 1346 if isinstance(chunk, dict): 1347 break # usage dict — discard, already tracked inside chat_stream 1348 full_chunks.append(chunk) 1349 yield chunk 1350 1351 full_response = "".join(full_chunks) 1352 1353 # Process any <spawn> blocks in the completed response 1354 _, spawned = await self._process_spawn_commands(full_response) 1355 1356 # Execute any @agent-name {payload} delegation patterns the LLM produced 1357 # If delegations ran, yield the results as an additional chunk 1358 delegated = await self._execute_llm_delegations(full_response) 1359 if delegated != full_response: 1360 # Find what changed and yield just the new parts 1361 import re as _re 1362 results = _re.findall(r'[✅❌]\s+\S+.*', delegated) 1363 if results: 1364 yield "\n" + "\n".join(results) 1365 full_response = delegated 1366 1367 system_msg = "" 1368 if spawned: 1369 names = ", ".join(f"'{a.name}'" for a in spawned if not isinstance(a, _SpawnPlaceholder)) 1370 bg_names = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)] 1371 parts = [] 1372 if names: 1373 replaced = '"replace": true' in full_response or '"replace":true' in full_response 1374 parts.append(f"{'Replaced' if replaced else 'Spawned'} {names} — will auto-restore on restart") 1375 if bg_names: 1376 parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly") 1377 system_msg = " | ".join(parts) 1378 1379 await self._mqtt_publish( 1380 f"agents/{self.actor_id}/logs", 1381 {"type": "user_interaction", "input": text[:100], "response": full_response[:200]}, 1382 ) 1383 1384 yield {"done": True, "spawned": spawned, "system_msg": system_msg} 1385 1386 # ── Planner ──────────────────────────────────────────────────────────── 1387 1388 _PLANNING_KEYWORDS = [ 1389 # Coordination signals 1390 "and then", "after that", "also", "combine", "compare", 1391 "coordinate", "plan", "pipeline", "orchestrate", "summarize both", 1392 "using multiple", "all agents", "several agents", 1393 # Multi-step / multi-domain signals 1394 "first.*then", "step by step", "in order", 1395 "weather.*news", "news.*weather", "manual.*code", "search.*analyze", 1396 # Reactive pipeline signals 1397 "if.*then", "when.*send", "when.*turn", "when.*open", "when.*close", 1398 "whenever", "monitor.*and", "watch.*and", "detect.*and", 1399 "notify me", "alert me", "automatically", 1400 ] 1401 1402 async def _needs_planning(self, text: str) -> bool: 1403 """ 1404 Heuristic: does this task benefit from multi-agent coordination? 1405 Keeps main fast — only escalates genuinely complex requests. 1406 """ 1407 import re 1408 lowered = text.lower() 1409 1410 # Explicit user request for coordination 1411 if any(w in lowered for w in ( 1412 "coordinate:", "plan:", "pipeline:", "@planner", 1413 "ask the planner", "use the planner", "create a pipeline", 1414 "set up a pipeline", "create a rule", "set up a rule", 1415 )): 1416 return True 1417 1418 # Keyword heuristic — multiple signals needed to avoid false positives 1419 hits = sum(1 for kw in self._PLANNING_KEYWORDS if re.search(kw, lowered)) 1420 if hits >= 2: 1421 return True 1422 1423 # References two or more known agent names 1424 if self._registry: 1425 agent_names = [a.name for a in self._registry.all_actors() 1426 if a.name not in {"main", "monitor", "installer"}] 1427 mentioned = sum(1 for name in agent_names if name in lowered) 1428 if mentioned >= 2: 1429 return True 1430 1431 return False 1432 1433 async def _run_planner(self, task: str) -> Optional[str]: 1434 """Spawn a PlannerAgent, hand it the task, wait for the result.""" 1435 from .planner_agent import PlannerAgent 1436 import uuid 1437 1438 # Enrich vague follow-up tasks with recent conversation context 1439 # so the planner has the full picture (e.g. which entity was found) 1440 enriched_task = task 1441 if self._conversation_history and len(task.split()) < 15: 1442 # Short/vague task — inject last 3 exchanges as context 1443 recent = self._conversation_history[-6:] # 3 user+assistant pairs 1444 ctx_lines = [] 1445 for m in recent: 1446 role = "User" if m["role"] == "user" else "Assistant" 1447 content = str(m["content"])[:300] 1448 ctx_lines.append(f"{role}: {content}") 1449 if ctx_lines: 1450 enriched_task = ( 1451 f"{task}\n\n" 1452 f"[Context from recent conversation:]\n" 1453 + "\n".join(ctx_lines) 1454 ) 1455 1456 planner_name = f"planner-{uuid.uuid4().hex[:6]}" 1457 logger.info(f"[{self.name}] Spawning planner '{planner_name}' for: {enriched_task[:60]}") 1458 1459 await self._mqtt_publish( 1460 f"agents/{self.actor_id}/logs", 1461 {"type": "log", "message": f"Complex task detected — spawning planner...", "timestamp": __import__('time').time()}, 1462 ) 1463 1464 task_id = f"plan_{uuid.uuid4().hex[:8]}" 1465 future: asyncio.Future = asyncio.get_running_loop().create_future() 1466 self._result_futures[task_id] = future 1467 1468 try: 1469 planner = await self.spawn( 1470 PlannerAgent, 1471 name=planner_name, 1472 llm_provider=self.llm, 1473 task=enriched_task, 1474 reply_to_id=self.actor_id, 1475 reply_task_id=task_id, 1476 auto_terminate=True, 1477 persistence_dir=str(self._persistence_dir.parent), 1478 ) 1479 if not planner: 1480 return None 1481 1482 result_payload = await asyncio.wait_for(future, timeout=180.0) 1483 answer = result_payload.get("result") or result_payload.get("text") or "" 1484 spawned_names = result_payload.get("spawned", []) 1485 if spawned_names: 1486 answer += f"\n\n[System: Planner created new agents: {', '.join(spawned_names)} — saved for future use]" 1487 return answer 1488 1489 except asyncio.TimeoutError: 1490 logger.warning(f"[{self.name}] Planner timed out for: {task[:60]}") 1491 return "The pipeline is taking longer than expected to set up. Check `/rules` in a moment to see if agents were spawned, or try again." 1492 except Exception as e: 1493 logger.error(f"[{self.name}] Planner error: {e}") 1494 return None 1495 finally: 1496 self._result_futures.pop(task_id, None) 1497 1498 # ── Spawn ────────────────────────────────────────────────────────────── 1499 1500 async def _execute_llm_delegations(self, response: str) -> str: 1501 """ 1502 Scan the LLM response for @agent-name {json} delegation patterns and execute them. 1503 Replaces the pattern in the response with the actual result. 1504 1505 Matches lines like: 1506 @doc-to-pptx-agent {"file_path": "...", "output_path": "..."} 1507 @weather-agent {"city": "Athens"} 1508 """ 1509 import re 1510 1511 # Find @agent-name then scan for the matching { } block manually 1512 # (regex alone can't handle } inside string values reliably) 1513 delegations = [] # list of (full_match_str, agent_name, payload_dict) 1514 1515 for m in re.finditer(r'@([\w][\w\-]*)\s+(\{)', response): 1516 agent_name = m.group(1) 1517 if agent_name == self.name: 1518 continue 1519 start = m.start(2) # position of opening { 1520 depth = 0 1521 end = start 1522 for i, ch in enumerate(response[start:], start): 1523 if ch == '{': 1524 depth += 1 1525 elif ch == '}': 1526 depth -= 1 1527 if depth == 0: 1528 end = i + 1 1529 break 1530 if depth != 0: 1531 continue # unmatched braces — skip 1532 json_str = response[start:end] 1533 try: 1534 payload = json.loads(json_str) 1535 except json.JSONDecodeError: 1536 continue 1537 delegations.append((response[m.start():end], agent_name, payload)) 1538 1539 replacements = [] 1540 for full_match, agent_name, payload in delegations: 1541 # Check if agent is running, if not auto-spawn via catalog 1542 target = self._registry.find_by_name(agent_name) if self._registry else None 1543 if not target: 1544 manifest = self._agent_manifests.get(agent_name, {}) 1545 if manifest.get("spawnable") and manifest.get("catalog"): 1546 catalog_actor = self._registry.find_by_name(manifest["catalog"]) if self._registry else None 1547 if catalog_actor and hasattr(catalog_actor, "_action_spawn"): 1548 logger.info(f"[{self.name}] Auto-spawning '{agent_name}' via catalog...") 1549 try: 1550 spawn_result = await catalog_actor._action_spawn(agent_name, {}) 1551 if spawn_result and spawn_result.get("ok"): 1552 await asyncio.sleep(0.5) 1553 target = self._registry.find_by_name(agent_name) if self._registry else None 1554 logger.info(f"[{self.name}] '{agent_name}' spawned successfully") 1555 else: 1556 err = spawn_result.get("message", "unknown") if spawn_result else "no response" 1557 logger.warning(f"[{self.name}] Spawn failed for '{agent_name}': {err}") 1558 except Exception as e: 1559 logger.error(f"[{self.name}] Spawn error for '{agent_name}': {e}") 1560 1561 if not target: 1562 replacements.append((full_match, f"[Could not reach {agent_name}]")) 1563 continue 1564 1565 json_str = json.dumps(payload) 1566 logger.info(f"[{self.name}] Executing LLM delegation → @{agent_name} {json_str[:80]}") 1567 try: 1568 result = await self.delegate_task(agent_name, json_str, timeout=300.0) 1569 if result: 1570 if isinstance(result, dict): 1571 error = result.get("error") 1572 if error: 1573 result_str = f"❌ {agent_name} failed: {error}" 1574 else: 1575 for key in ("pptx_path", "image_path", "result", "message", "output", "text"): 1576 if result.get(key): 1577 result_str = f"✅ {agent_name} completed: {key}={result[key]}" 1578 break 1579 else: 1580 result_str = f"✅ {agent_name} completed: {result}" 1581 else: 1582 result_str = f"✅ {agent_name}: {result}" 1583 else: 1584 result_str = f"[{agent_name} did not respond]" 1585 except Exception as e: 1586 result_str = f"[{agent_name} error: {e}]" 1587 1588 replacements.append((full_match, result_str)) 1589 1590 # Apply replacements 1591 for original, replacement in replacements: 1592 response = response.replace(original, replacement) 1593 1594 return response 1595 1596 @staticmethod 1597 def _parse_spawn_config(raw: str) -> dict: 1598 """ 1599 Robustly parse a spawn config that may contain raw multiline code strings. 1600 Uses character scanning to correctly handle } and " inside the code value. 1601 """ 1602 raw = raw.strip() 1603 1604 # Strategy 1: standard JSON (works when LLM properly escapes newlines) 1605 try: 1606 return json.loads(raw) 1607 except json.JSONDecodeError: 1608 pass 1609 1610 # Strategy 2: backtick-delimited code (rare but some LLMs use it) 1611 bt_match = re.search(r'"code"\s*:\s*`(.*?)`', raw, re.DOTALL) 1612 if bt_match: 1613 code_raw = bt_match.group(1) 1614 placeholder = re.sub(r'"code"\s*:\s*`.*?`', '"code": "__CODE__"', raw, flags=re.DOTALL) 1615 config = json.loads(placeholder) 1616 config["code"] = code_raw 1617 return config 1618 1619 # Strategy 3: character scanner — find opening " after "code": 1620 # then scan forward respecting escape sequences to find the real closing " 1621 # This correctly handles } and { inside the code value. 1622 key_match = re.search(r'"code"\s*:\s*"', raw) 1623 if not key_match: 1624 raise ValueError(f"No 'code' key found in spawn config:\n{raw[:200]}") 1625 1626 code_start = key_match.end() # index right after the opening " 1627 i = code_start 1628 while i < len(raw): 1629 if raw[i] == '\\': 1630 i += 2 # skip escaped character 1631 continue 1632 if raw[i] == '"': 1633 break # found unescaped closing quote 1634 i += 1 1635 1636 code_raw = raw[code_start:i] 1637 placeholder = raw[:key_match.start()] + '"code": "__CODE__"' + raw[i+1:] 1638 1639 try: 1640 config = json.loads(placeholder) 1641 except json.JSONDecodeError as e: 1642 raise ValueError(f"Spawn config JSON invalid after code extraction: {e}\nPlaceholder:\n{placeholder[:300]}") 1643 1644 # Unescape sequences the LLM may have added 1645 config["code"] = (code_raw 1646 .replace("\\n", "\n") 1647 .replace('\\"', '"') 1648 .replace("\\t", "\t")) 1649 return config 1650 1651 async def _process_spawn_commands(self, response: str): 1652 spawned = [] 1653 pattern = r'<spawn>(.*?)</spawn>' 1654 1655 for match in re.findall(pattern, response, re.DOTALL): 1656 try: 1657 config = self._parse_spawn_config(match.strip()) 1658 # LLM agents have no "code" — only check for code if type is dynamic 1659 agent_type = config.get("type", "dynamic") 1660 has_code = bool(config.get("code", "").strip()) 1661 has_prompt = bool(config.get("system_prompt", "").strip()) 1662 if agent_type == "dynamic" and not has_code: 1663 logger.error(f"[{self.name}] Dynamic agent has no code: {config.get('name')}") 1664 continue 1665 if agent_type == "llm" and not has_prompt: 1666 logger.warning(f"[{self.name}] LLM agent has no system_prompt, using default: {config.get('name')}") 1667 actor = await self._spawn_from_config(config, save=True) 1668 if actor: 1669 spawned.append(actor) 1670 except Exception as e: 1671 logger.error(f"[{self.name}] Spawn failed: {e}\nRaw block:\n{match[:500]}") 1672 1673 clean = re.sub(pattern, '', response, flags=re.DOTALL).strip() 1674 return clean, spawned 1675 1676 async def _spawn_from_config(self, config: dict, save: bool = True) -> Optional[Actor]: 1677 name = config.get("name", "dynamic-agent") 1678 node = config.get("node", "").strip() 1679 1680 # Remote spawn — publish to the node's spawn topic via MQTT 1681 if node: 1682 return await self._spawn_remote(config, node, save) 1683 1684 # Local spawn 1685 from .dynamic_agent import DynamicAgent 1686 1687 existing = self._registry.find_by_name(name) if self._registry else None 1688 replace = config.get("replace", False) 1689 1690 if existing: 1691 if not replace: 1692 logger.info(f"[{self.name}] '{name}' already exists (use replace=true to update).") 1693 return existing 1694 # Stop the old agent cleanly before spawning the replacement 1695 logger.info(f"[{self.name}] Replacing '{name}' with updated code...") 1696 try: 1697 if self._registry: 1698 await self._registry.unregister(existing.actor_id) 1699 await existing.stop() 1700 await asyncio.sleep(0.5) 1701 except Exception as e: 1702 logger.warning(f"[{self.name}] Error stopping old '{name}': {e}") 1703 1704 agent_type = config.get("type", "dynamic") 1705 code = config.get("code", "").strip() 1706 system_prompt = config.get("system_prompt", "").strip() 1707 1708 # Route to the right agent class 1709 if agent_type == "ha_actuator": 1710 actor = await self._spawn_ha_actuator(config, name) 1711 elif agent_type == "manual" or name == "manual-agent": 1712 actor = await self._spawn_manual_agent(config, name) 1713 elif agent_type == "llm" or (not code and system_prompt): 1714 actor = await self._spawn_llm_agent(config, name) 1715 elif code: 1716 actor = await self._spawn_dynamic_agent(config, name, code) 1717 else: 1718 logger.warning(f"[{self.name}] Spawn config for '{name}' has neither code nor system_prompt.") 1719 return None 1720 1721 if actor and save: 1722 self._save_to_spawn_registry(config) 1723 1724 return actor 1725 1726 async def _spawn_ha_actuator(self, config: dict, name: str): 1727 """Spawn a HomeAssistantActuatorAgent from a spawn block with type: ha_actuator.""" 1728 from .home_assistant_actuator_agent import ( 1729 HomeAssistantActuatorAgent, ActuatorConfig, ActuatorAction, ActuatorCondition, 1730 ) 1731 import hashlib as _hl 1732 1733 # Ensure unique name if collision 1734 if self._registry and self._registry.find_by_name(name): 1735 suffix = _hl.md5(f"{name}{__import__('time').time()}".encode()).hexdigest()[:4] 1736 name = f"{name}-{suffix}" 1737 1738 automation_id = config.get("automation_id", name) 1739 actuator_cfg = ActuatorConfig( 1740 automation_id = automation_id, 1741 description = config.get("description", ""), 1742 mqtt_topics = config.get("mqtt_topics", []), 1743 actions = [ActuatorAction.from_dict(a) for a in config.get("actions", [])], 1744 conditions = [ActuatorCondition.from_dict(c) for c in config.get("conditions", [])], 1745 detection_filter = config.get("detection_filter"), 1746 cooldown_seconds = float(config.get("cooldown_seconds", 10.0)), 1747 ) 1748 logger.info(f"[{self.name}] Spawning HomeAssistantActuatorAgent '{name}'") 1749 actor = await self.spawn( 1750 HomeAssistantActuatorAgent, 1751 config = actuator_cfg, 1752 name = name, 1753 persistence_dir = str(self._persistence_dir.parent), 1754 ) 1755 return actor 1756 1757 async def _spawn_manual_agent(self, config: dict, name: str): 1758 """Spawn the pre-defined ManualAgent — robust PDF manual search and Q&A.""" 1759 from .manual_agent import ManualAgent 1760 logger.info(f"[{self.name}] Spawning ManualAgent '{name}'") 1761 actor = await self.spawn( 1762 ManualAgent, 1763 name=name, 1764 llm_provider=self.llm, 1765 persistence_dir=str(self._persistence_dir.parent), 1766 ) 1767 return actor 1768 1769 async def _spawn_llm_agent(self, config: dict, name: str): 1770 """Spawn a proper LLMAgent — best for chat, Q&A, reasoning tasks.""" 1771 from .llm_agent import LLMAgent 1772 system_prompt = config.get("system_prompt", "You are a helpful assistant.") 1773 logger.info(f"[{self.name}] Spawning LLM agent '{name}'") 1774 actor = await self.spawn( 1775 LLMAgent, 1776 name=name, 1777 llm_provider=self.llm, 1778 system_prompt=system_prompt, 1779 persistence_dir=str(self._persistence_dir.parent), 1780 ) 1781 return actor 1782 1783 async def _spawn_dynamic_agent(self, config: dict, name: str, code: str): 1784 """Spawn a DynamicAgent — best for data pipelines, sensors, tools.""" 1785 packages = config.get("install", []) 1786 if isinstance(packages, str): 1787 packages = [p.strip() for p in packages.replace(",", " ").split()] 1788 1789 if packages: 1790 # Install and spawn in a background task so we don't block the user 1791 logger.info(f"[{self.name}] Scheduling background install+spawn for '{name}': {packages}") 1792 asyncio.create_task(self._install_then_spawn(config, name, code, packages)) 1793 # Return a placeholder so the caller knows spawn is in progress 1794 return _SpawnPlaceholder(name) 1795 else: 1796 return await self._do_spawn_dynamic(config, name, code) 1797 1798 async def _install_then_spawn(self, config: dict, name: str, code: str, packages: list): 1799 """Background task: install packages then spawn the agent.""" 1800 try: 1801 await self._mqtt_publish( 1802 f"agents/{self.actor_id}/logs", 1803 {"type": "log", "message": f"Installing {packages} for {name}...", "timestamp": __import__("time").time()}, 1804 ) 1805 await self._install_packages(packages) 1806 actor = await self._do_spawn_dynamic(config, name, code) 1807 if actor: 1808 self._save_to_spawn_registry(config) 1809 await self._mqtt_publish( 1810 f"agents/{self.actor_id}/logs", 1811 {"type": "spawned", "message": f"'{name}' spawned after install", "child_name": name, "timestamp": __import__("time").time()}, 1812 ) 1813 logger.info(f"[{self.name}] Background spawn complete: {name}") 1814 except Exception as e: 1815 logger.error(f"[{self.name}] Background install+spawn failed for '{name}': {e}") 1816 1817 async def _do_spawn_dynamic(self, config: dict, name: str, code: str): 1818 """Actually create and start the DynamicAgent.""" 1819 from .dynamic_agent import DynamicAgent 1820 actor = await self.spawn( 1821 DynamicAgent, 1822 name=name, 1823 code=code, 1824 poll_interval=float(config.get("poll_interval", 1.0)), 1825 description=config.get("description", ""), 1826 input_schema=config.get("input_schema", {}), 1827 output_schema=config.get("output_schema", {}), 1828 llm_provider=self.llm, 1829 persistence_dir=str(self._persistence_dir.parent), 1830 ) 1831 return actor 1832 1833 async def _install_packages(self, packages: list[str]): 1834 """Delegate package installation to the installer agent.""" 1835 if not self._registry: 1836 return 1837 1838 # Fast path: check which packages actually need installing 1839 import importlib, sys 1840 needed = [] 1841 for pkg in packages: 1842 import_name = pkg.replace("-", "_").split("[")[0] 1843 try: 1844 importlib.import_module(import_name) 1845 except ImportError: 1846 needed.append(pkg) 1847 if not needed: 1848 logger.info(f"[{self.name}] All packages already available: {packages} — skipping install") 1849 return 1850 1851 installer = self._registry.find_by_name("installer") 1852 if not installer: 1853 logger.warning(f"[{self.name}] installer agent not found — skipping install of {needed}") 1854 return 1855 logger.info(f"[{self.name}] Installing packages via installer: {needed}") 1856 import uuid 1857 task_id = f"install_{uuid.uuid4().hex[:8]}" 1858 future = asyncio.get_event_loop().create_future() 1859 self._result_futures[task_id] = future 1860 await self.send(installer.actor_id, MessageType.TASK, { 1861 "action": "install", 1862 "packages": needed, 1863 "task": task_id, 1864 "_task_id": task_id, 1865 "reply_to": self.actor_id, 1866 }) 1867 try: 1868 result = await asyncio.wait_for(future, timeout=120.0) 1869 logger.info(f"[{self.name}] Install result: {result.get('message', result)}") 1870 if result.get("failed"): 1871 logger.warning(f"[{self.name}] Failed to install: {result['failed']}") 1872 except asyncio.TimeoutError: 1873 logger.warning(f"[{self.name}] Package install timed out for {needed}") 1874 finally: 1875 self._result_futures.pop(task_id, None) 1876 1877 async def run_pipeline(self, goal: str, agents: list[str], timeout: float = 300.0, force_replan: bool = False) -> dict: 1878 """ 1879 Spawn an ephemeral TaskManager to coordinate a multi-agent pipeline. 1880 Returns the final synthesised result without blocking main's context. 1881 1882 Usage: 1883 result = await main.run_pipeline( 1884 goal="Find the Philips EP2220 manual and answer: how do I descale it?", 1885 agents=["manual-agent", "installer"] 1886 ) 1887 """ 1888 from .task_manager import TaskManager 1889 import uuid 1890 1891 task_id = uuid.uuid4().hex[:8] 1892 future = asyncio.get_event_loop().create_future() 1893 self._result_futures[task_id] = future 1894 1895 mgr = await self.spawn( 1896 TaskManager, 1897 goal=goal, 1898 available_agents=agents, 1899 llm_provider=self.llm, 1900 reply_to_id=self.actor_id, 1901 reply_task_id=task_id, 1902 auto_destroy=True, 1903 force_replan=force_replan, 1904 cache_dir=str(self._persistence_dir.parent / "plan_cache"), 1905 persistence_dir=str(self._persistence_dir.parent), 1906 ) 1907 1908 logger.info(f"[{self.name}] Pipeline started: {mgr.name} for goal: {goal[:60]}") 1909 1910 try: 1911 result = await asyncio.wait_for(future, timeout=timeout) 1912 return result 1913 except asyncio.TimeoutError: 1914 logger.warning(f"[{self.name}] Pipeline timed out after {timeout}s") 1915 return {"error": f"Pipeline timed out after {timeout}s"} 1916 finally: 1917 self._result_futures.pop(task_id, None) 1918 1919 async def _spawn_remote(self, config: dict, node: str, save: bool) -> None: 1920 """ 1921 Publish a spawn command to a remote node via MQTT. 1922 The remote_runner.py on that machine will receive it and run the agent. 1923 Remote agents appear in the dashboard exactly like local ones 1924 because they connect to the same MQTT broker. 1925 1926 Also updates nodes/{node}/desired_state (retained) with ALL agents for 1927 this node so the runner can self-heal after a reboot. 1928 """ 1929 name = config.get("name", "remote-agent") 1930 logger.info(f"[{self.name}] Spawning '{name}' on remote node '{node}'") 1931 1932 # Publish individual spawn (for immediate delivery) 1933 await self._mqtt_publish( 1934 f"nodes/{node}/spawn", 1935 config, 1936 retain=True, 1937 qos=1, 1938 ) 1939 1940 # Update desired state for the whole node (retained — survives Pi reboot) 1941 await self._update_node_desired_state(node, config) 1942 1943 await self._mqtt_publish( 1944 f"agents/{self.actor_id}/logs", 1945 {"type": "spawned", "message": f"Spawned '{name}' on node '{node}'", 1946 "child_name": name, "node": node, "timestamp": __import__("time").time()} 1947 ) 1948 1949 if save: 1950 self._save_to_spawn_registry(config) 1951 1952 return None 1953 1954 async def _update_node_desired_state(self, node: str, new_config: dict = None, 1955 remove_name: str = None) -> None: 1956 """ 1957 Maintain nodes/{node}/desired_state as a retained MQTT message containing 1958 ALL agents that should run on this node. The runner reads this on startup 1959 and reconciles — spawning missing agents, ignoring already-running ones. 1960 """ 1961 # Build desired state from spawn registry filtered to this node 1962 reg = self._get_spawn_registry() 1963 agents = { 1964 name: cfg for name, cfg in reg.items() 1965 if cfg.get("node", "").strip() == node 1966 } 1967 1968 # Apply pending change before publishing 1969 if new_config: 1970 agents[new_config["name"]] = new_config 1971 if remove_name: 1972 agents.pop(remove_name, None) 1973 1974 await self._mqtt_publish( 1975 f"nodes/{node}/desired_state", 1976 {"node": node, "agents": list(agents.values()), 1977 "timestamp": __import__("time").time()}, 1978 retain=True, 1979 qos=1, 1980 ) 1981 logger.info(f"[{self.name}] Desired state for '{node}': {list(agents.keys())}") 1982 1983 # ── Node registry ────────────────────────────────────────────────────── 1984 1985 def list_nodes(self) -> list[dict]: 1986 """Return all known remote nodes with their last-seen time and running agents.""" 1987 import time as _time 1988 now = _time.time() 1989 return [ 1990 { 1991 "node": name, 1992 "agents": info.get("agents", []), 1993 "last_seen": info.get("last_seen", 0), 1994 "online": (now - info.get("last_seen", 0)) < 30, 1995 } 1996 for name, info in self._known_nodes.items() 1997 ] 1998 1999 def list_topics(self, keyword: str = "") -> list[dict]: 2000 """ 2001 Return all known MQTT topics published by agents, optionally filtered by keyword. 2002 Each entry: {"topic": str, "agents": [{"name", "node", "description"}, ...]} 2003 2004 Example: 2005 list_topics("cpu") → topics containing "cpu" 2006 list_topics("temp") → topics containing "temp" 2007 list_topics() → all topics 2008 """ 2009 results = [] 2010 kw = keyword.lower() 2011 for topic, manifests in self._topic_registry.items(): 2012 if kw and kw not in topic.lower(): 2013 continue 2014 results.append({ 2015 "topic": topic, 2016 "agents": [{"name": m.get("name"), "node": m.get("node"), 2017 "description": m.get("description", "")} for m in manifests], 2018 }) 2019 return sorted(results, key=lambda x: x["topic"]) 2020 2021 def list_capabilities(self, keyword: str = "") -> list[dict]: 2022 """ 2023 Return all known agents with their full capability profile: 2024 name, description, capabilities, input_schema, output_schema. 2025 2026 Example: 2027 list_capabilities() → all agents 2028 list_capabilities("weather") → agents with "weather" in description/capabilities 2029 """ 2030 results = [] 2031 kw = keyword.lower().strip() 2032 # Support multi-word keywords — match if ANY word appears in the haystack 2033 kw_words = kw.split() if kw else [] 2034 for name, manifest in self._agent_manifests.items(): 2035 desc = manifest.get("description", "") 2036 caps = manifest.get("capabilities", []) 2037 # Filter by keyword across description, capabilities, and name 2038 if kw_words: 2039 haystack = desc.lower() + " " + " ".join(caps).lower() + " " + name.lower() 2040 if not any(w in haystack for w in kw_words): 2041 continue 2042 results.append({ 2043 "name": name, 2044 "node": manifest.get("node"), 2045 "description": desc, 2046 "capabilities": caps, 2047 "input_schema": manifest.get("input_schema", {}), 2048 "output_schema": manifest.get("output_schema", {}), 2049 "spawnable": manifest.get("spawnable", False), 2050 "running": bool(self._registry and self._registry.find_by_name(name)), 2051 }) 2052 return sorted(results, key=lambda x: x["name"]) 2053 2054 async def _manifest_listener(self): 2055 """ 2056 Subscribe to agents/+/manifest and build a searchable topic registry. 2057 Retained manifests are delivered immediately on subscribe so the registry 2058 is populated even for agents that started before main restarted. 2059 """ 2060 try: 2061 import aiomqtt 2062 except ImportError: 2063 return 2064 2065 while self.state.value not in ("stopped", "failed"): 2066 try: 2067 async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client: 2068 await client.subscribe("agents/+/manifest") 2069 logger.info("[main] Subscribed to agent manifests.") 2070 async for msg in client.messages: 2071 try: 2072 data = json.loads(msg.payload.decode()) 2073 except Exception: 2074 continue 2075 if not isinstance(data, dict): 2076 continue 2077 agent_name = data.get("name", "?") 2078 published = data.get("publishes", []) 2079 # Update topic registry 2080 for topic in published: 2081 existing = self._topic_registry.setdefault(topic, []) 2082 # Replace existing entry for this agent or append 2083 updated = False 2084 for i, m in enumerate(existing): 2085 if m.get("name") == agent_name: 2086 existing[i] = data 2087 updated = True 2088 break 2089 if not updated: 2090 existing.append(data) 2091 # Also store full manifest by agent name for capability queries 2092 self._agent_manifests[agent_name] = data 2093 logger.debug(f"[main] Manifest from '{agent_name}': {published}") 2094 except asyncio.CancelledError: 2095 break 2096 except Exception as e: 2097 if self.state.value not in ("stopped", "failed"): 2098 logger.warning(f"[main] Manifest listener error: {e}. Reconnecting in 5s…") 2099 await asyncio.sleep(5) 2100 2101 async def migrate_agent(self, agent_name: str, target_node: str) -> dict: 2102 """ 2103 Move a running agent to a different node. 2104 2105 If the agent is local: saves updated config (with new node) and re-spawns remotely. 2106 If the agent is remote: publishes a migrate command to its current node. 2107 Returns {"success": bool, "message": str} 2108 """ 2109 import time as _time 2110 2111 reg = self._get_spawn_registry() 2112 config = reg.get(agent_name) 2113 if not config: 2114 return {"success": False, "message": f"Agent '{agent_name}' not in spawn registry."} 2115 2116 current_node = config.get("node", "").strip() 2117 2118 if current_node == target_node: 2119 return {"success": False, "message": f"Agent '{agent_name}' is already on '{target_node}'."} 2120 2121 if current_node: 2122 # ── Remote → Remote migration ──────────────────────────────────── 2123 logger.info(f"[{self.name}] Migrating '{agent_name}' from node '{current_node}' → '{target_node}'") 2124 await self._mqtt_publish( 2125 f"nodes/{current_node}/migrate", 2126 {"name": agent_name, "target_node": target_node}, 2127 ) 2128 else: 2129 # ── Local → Remote migration ───────────────────────────────────── 2130 logger.info(f"[{self.name}] Migrating LOCAL agent '{agent_name}' → remote node '{target_node}'") 2131 2132 # Stop the local instance 2133 if self._registry: 2134 local = self._registry.find_by_name(agent_name) 2135 if local: 2136 try: 2137 await self._registry.unregister(local.actor_id) 2138 await local.stop() 2139 await asyncio.sleep(0.3) 2140 except Exception as e: 2141 logger.warning(f"[{self.name}] Could not stop local '{agent_name}': {e}") 2142 2143 # Update config with new node target and re-spawn remotely 2144 new_config = dict(config) 2145 new_config["node"] = target_node 2146 new_config.pop("replace", None) 2147 2148 await self._spawn_remote(new_config, target_node, save=True) 2149 2150 # Update spawn registry so next restart re-spawns to the right node 2151 updated = dict(config) 2152 updated["node"] = target_node 2153 self._save_to_spawn_registry(updated) 2154 2155 msg = (f"Migrating '{agent_name}' from '{current_node or 'local'}' " 2156 f"→ '{target_node}'. It will appear in the dashboard shortly.") 2157 logger.info(f"[{self.name}] {msg}") 2158 return {"success": True, "message": msg} 2159 2160 async def _node_heartbeat_listener(self): 2161 """ 2162 Subscribe to nodes/+/heartbeat so main knows which remote nodes are online. 2163 Updates self._known_nodes which is used by list_nodes() and the LLM context. 2164 """ 2165 try: 2166 import aiomqtt 2167 except ImportError: 2168 logger.warning("[main] aiomqtt not available — node heartbeat tracking disabled.") 2169 return 2170 2171 while self.state.value not in ("stopped", "failed"): 2172 try: 2173 async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client: 2174 await client.subscribe("nodes/+/heartbeat") 2175 await client.subscribe("nodes/+/migrate_result") 2176 logger.info("[main] Subscribed to node heartbeats.") 2177 async for msg in client.messages: 2178 topic = str(msg.topic) 2179 try: 2180 data = json.loads(msg.payload.decode()) 2181 except Exception: 2182 continue 2183 2184 parts = topic.split("/") 2185 if len(parts) < 3: 2186 continue 2187 node_name = parts[1] 2188 2189 if topic.endswith("/heartbeat"): 2190 import time as _t 2191 self._known_nodes[node_name] = { 2192 "last_seen": _t.time(), 2193 "agents": data.get("agents", []), 2194 "node_id": data.get("node_id", ""), 2195 } 2196 elif topic.endswith("/migrate_result"): 2197 success = data.get("success", False) 2198 agent = data.get("agent", "?") 2199 to_node = data.get("to_node", "?") 2200 sev = "info" if success else "warning" 2201 self._pending_notifications.append({ 2202 "_monitor_notification": True, 2203 "message": ( 2204 f"Migration of '{agent}' to '{to_node}' succeeded." 2205 if success else 2206 f"Migration of '{agent}' failed: {data.get('error', '?')}" 2207 ), 2208 "severity": sev, 2209 "timestamp": __import__("time").time(), 2210 }) 2211 2212 except asyncio.CancelledError: 2213 break 2214 except Exception as e: 2215 if self.state.value not in ("stopped", "failed"): 2216 logger.warning(f"[main] Node heartbeat listener error: {e}. Reconnecting in 5s…") 2217 await asyncio.sleep(5) 2218 2219 # ── Delegation ───────────────────────────────────────────────────────── 2220 2221 async def delegate_to_installer(self, payload: dict, timeout: float = 300.0) -> dict: 2222 """ 2223 Send a task to the installer agent and wait for the result. 2224 Handles node_deploy, node_install, node_run, install, check actions. 2225 timeout is generous (300s) because deploys involve SSH + pip installs. 2226 """ 2227 if not self._registry: 2228 return {"error": "No registry available"} 2229 installer = self._registry.find_by_name("installer") 2230 if not installer: 2231 return {"error": "installer agent not found"} 2232 2233 import uuid as _uuid 2234 task_id = f"inst_{_uuid.uuid4().hex[:8]}" 2235 future: asyncio.Future = asyncio.get_event_loop().create_future() 2236 self._result_futures[task_id] = future 2237 2238 payload = dict(payload) 2239 payload["_task_id"] = task_id 2240 payload["task"] = task_id 2241 2242 await self.send(installer.actor_id, MessageType.TASK, payload) 2243 try: 2244 return await asyncio.wait_for(future, timeout=timeout) 2245 except asyncio.TimeoutError: 2246 return {"error": f"Installer timed out after {timeout}s"} 2247 finally: 2248 self._result_futures.pop(task_id, None) 2249 2250 async def delegate_task(self, target_name: str, task: str, timeout: float = 60.0) -> Optional[dict]: 2251 if not self._registry: 2252 return None 2253 target = self._registry.find_by_name(target_name) 2254 if not target: 2255 return None 2256 future = asyncio.get_event_loop().create_future() 2257 self._result_futures[task] = future 2258 await self.send(target.actor_id, MessageType.TASK, {"text": task, "reply_to": self.actor_id}) 2259 try: 2260 return await asyncio.wait_for(future, timeout=timeout) 2261 except asyncio.TimeoutError: 2262 return None 2263 finally: 2264 self._result_futures.pop(task, None) 2265 2266 async def list_agents(self) -> list[dict]: 2267 if not self._registry: 2268 return [] 2269 return [a.get_status() for a in self._registry.all_actors()] 2270 2271 async def send_command(self, target_name: str, command: MessageType): 2272 if not self._registry: 2273 return 2274 target = self._registry.find_by_name(target_name) 2275 if target: 2276 await self.send(target.actor_id, command) 2277 2278 async def delete_spawned_agent(self, name: str): 2279 # Find node before removing from registry 2280 reg = self._get_spawn_registry() 2281 node = reg.get(name, {}).get("node", "").strip() 2282 2283 self._remove_from_spawn_registry(name) 2284 2285 # Update desired state so Pi doesn't re-spawn on reconcile 2286 if node: 2287 await self._update_node_desired_state(node, remove_name=name) 2288 await self._mqtt_publish(f"nodes/{node}/stop", {"name": name}, qos=1) 2289 2290 if self._registry: 2291 target = self._registry.find_by_name(name) 2292 if target: 2293 await self._registry.unregister(target.actor_id) 2294 await target.stop()
An Actor that uses an LLM to process tasks. Maintains conversation history and supports tool use.
666 def __init__(self, llm_provider: Optional[LLMProvider] = None, **kwargs): 667 kwargs.setdefault("name", "main") 668 kwargs.setdefault("system_prompt", ORCHESTRATOR_PROMPT) 669 super().__init__(llm_provider=llm_provider, **kwargs) 670 self._result_futures: dict[str, asyncio.Future] = {} 671 # Queued monitor notifications — prepended to next user response 672 self._pending_notifications: list[dict] = [] 673 self.protected = True 674 # Remote node tracking: node_name → {"last_seen": float, "agents": [...]} 675 self._known_nodes: dict[str, dict] = {} 676 # Topic registry: topic → [manifest, ...] — built from agents/+/manifest 677 self._topic_registry: dict[str, list] = {} # topic → list of agent manifests 678 self._agent_manifests: dict[str, dict] = {} # agent name → latest manifest (includes schemas)
682 async def on_start(self): 683 await super().on_start() 684 await self._restore_spawned_agents() 685 # Listen for remote node heartbeats so we know what's online 686 self._tasks.append(asyncio.create_task(self._node_heartbeat_listener())) 687 # Listen for agent capability manifests to build topic registry 688 self._tasks.append(asyncio.create_task(self._manifest_listener())) 689 # Inject persisted user facts into system prompt 690 self._inject_user_facts_into_prompt()
Called when actor starts. Override for init logic.
723 def get_notification_urls(self) -> dict: 724 """Return persisted notification webhook URLs (discord, telegram, slack, etc.)""" 725 return self.recall("_notification_urls") or {}
Return persisted notification webhook URLs (discord, telegram, slack, etc.)
787 async def delete_pipeline_rule(self, rule_id: str) -> str: 788 """Stop all agents for a rule and remove it from registry.""" 789 rules = self.get_pipeline_rules() 790 rule = rules.get(rule_id) 791 if not rule: 792 return f"No rule found with id '{rule_id}'." 793 agents = rule.get("agents", []) 794 stopped = [] 795 for agent_name in agents: 796 self._remove_from_spawn_registry(agent_name) 797 if self._registry: 798 actor = self._registry.find_by_name(agent_name) 799 if actor: 800 await actor.stop() 801 await self._registry.unregister(actor.actor_id) 802 stopped.append(agent_name) 803 del rules[rule_id] 804 self.persist(PIPELINE_RULES_KEY, rules) 805 task_preview = rule.get("task", "")[:60] 806 return f"Rule '{rule_id}' deleted. Stopped agents: {', '.join(stopped) or 'none running'}.\nRule was: {task_preview}"
Stop all agents for a rule and remove it from registry.
834 async def handle_message(self, msg: Message): 835 if msg.type == MessageType.TASK: 836 # Intercept monitor notifications BEFORE passing to LLM _handle_task 837 if isinstance(msg.payload, dict) and msg.payload.get("_monitor_notification"): 838 self._pending_notifications.append(msg.payload) 839 logger.info(f"[{self.name}] Monitor alert queued: {msg.payload.get('message','')[:80]}") 840 return 841 await self._handle_task(msg) 842 843 elif msg.type == MessageType.RESULT: 844 if isinstance(msg.payload, dict): 845 # Support both key names: "_task_id" (new) and "task" (legacy) 846 fid = msg.payload.get("_task_id") or msg.payload.get("task") 847 if fid and fid in self._result_futures: 848 fut = self._result_futures[fid] 849 if not fut.done(): 850 fut.set_result(msg.payload)
Handle messages not caught by default handlers.
930 async def chat(self, user_message: str) -> str: 931 response = await super().chat(user_message) 932 # Fire-and-forget fact extraction — don't block the response 933 asyncio.create_task(self._extract_and_save_facts(user_message, response)) 934 return response
Direct async call - useful for the main conversation actor.
936 async def chat_stream(self, user_message: str): 937 full_response = [] 938 async for chunk in super().chat_stream(user_message): 939 if isinstance(chunk, dict): 940 yield chunk 941 else: 942 full_response.append(chunk) 943 yield chunk 944 # Extract facts from completed response 945 if full_response: 946 asyncio.create_task( 947 self._extract_and_save_facts(user_message, "".join(full_response)) 948 )
Streaming version of chat(). Yields text chunks, then a final usage dict. The caller is responsible for printing chunks as they arrive.
Usage: async for chunk in agent.chat_stream("hello"): if isinstance(chunk, dict): usage = chunk # final usage summary else: print(chunk, end="", flush=True)
963 async def process_user_input(self, text: str) -> str: 964 note_prefix = self._drain_notifications() 965 966 # ── Direct API intercepts — handle without LLM round-trip ────────── 967 stripped = text.strip().rstrip("()") 968 if stripped in ("main.list_nodes", "list_nodes", "/nodes"): 969 nodes = self.list_nodes() 970 if not nodes: 971 return note_prefix + "No remote nodes seen yet. Deploy one with /deploy <node-name>." 972 import time as _t 973 lines = [] 974 for nd in sorted(nodes, key=lambda x: x["node"]): 975 status = "🟢 online" if nd["online"] else "🔴 offline" 976 agents = ", ".join(nd["agents"]) or "(no agents)" 977 age = int(_t.time() - nd["last_seen"]) 978 lines.append(f" {nd['node']:22s} {status} | agents: {agents} | last heartbeat: {age}s ago") 979 return note_prefix + "Remote nodes:\n" + "\n".join(lines) 980 981 if stripped.startswith("/topics"): 982 keyword = stripped[7:].strip().lstrip("(").rstrip(")") 983 topics = self.list_topics(keyword) 984 if not topics: 985 msg = f"No topics found" + (f" matching '{keyword}'" if keyword else "") + "." 986 msg += " Topics are registered automatically when agents publish for the first time." 987 return note_prefix + msg 988 lines = [f"Known MQTT topics{' matching ' + repr(keyword) if keyword else ''}:"] 989 for t in topics: 990 agent_strs = ", ".join( 991 f"{a['name']}" + (f" ({a['node']})" if a.get("node") else "") 992 for a in t["agents"] 993 ) 994 lines.append(f" {t['topic']:40s} ← {agent_strs}") 995 return note_prefix + "\n".join(lines) 996 997 # ── Webhook / notification URL management ─────────────────────────── 998 if stripped.startswith("/memory"): 999 parts = stripped.split(None, 1) 1000 sub = parts[1].strip() if len(parts) > 1 else "" 1001 if sub == "clear": 1002 self.persist("_user_facts", {}) 1003 self.persist("history_summary", "") 1004 self._history_summary = "" 1005 self.system_prompt = ORCHESTRATOR_PROMPT 1006 return note_prefix + "Memory cleared — user facts and conversation summary reset." 1007 if sub.startswith("forget "): 1008 key = sub[7:].strip() 1009 facts = self.get_user_facts() 1010 if key in facts: 1011 del facts[key] 1012 self.persist("_user_facts", facts) 1013 self._inject_user_facts_into_prompt() 1014 return note_prefix + f"Forgotten: '{key}'" 1015 return note_prefix + f"No fact found with key '{key}'." 1016 # Default: show memory 1017 facts = self.get_user_facts() 1018 summary = self._history_summary 1019 lines = [] 1020 if facts: 1021 lines.append(f"User facts ({len(facts)}):") 1022 for k, v in facts.items(): 1023 lines.append(f" {k}: {v}") 1024 else: 1025 lines.append("No user facts stored yet.") 1026 if summary: 1027 lines.append(f"\nConversation summary:\n {summary[:300]}{'...' if len(summary) > 300 else ''}") 1028 else: 1029 lines.append("\nNo conversation summary yet.") 1030 lines.append("\nCommands: /memory clear | /memory forget <key>") 1031 return note_prefix + "\n".join(lines) 1032 1033 if stripped.startswith("/webhook"): 1034 parts = stripped.split(None, 2) 1035 if len(parts) == 1: 1036 # /webhook — show stored URLs 1037 urls = self.recall("_notification_urls") or {} 1038 if not urls: 1039 return note_prefix + "No notification URLs stored.\nUse: /webhook discord <url> or /webhook telegram <url>" 1040 lines = ["Stored notification URLs:"] 1041 for svc, url in urls.items(): 1042 lines.append(f" {svc}: {url}") 1043 return note_prefix + "\n".join(lines) 1044 elif len(parts) >= 3: 1045 # /webhook discord <url> 1046 service = parts[1].lower() 1047 url = parts[2].strip() 1048 urls = self.recall("_notification_urls") or {} 1049 urls[service] = url 1050 self.persist("_notification_urls", urls) 1051 return note_prefix + f"Saved {service} webhook URL. Pipelines will use it automatically." 1052 else: 1053 return note_prefix + "Usage: /webhook <service> <url>\nExample: /webhook discord https://discord.com/api/webhooks/..." 1054 1055 # Auto-detect webhook URLs in any message and persist them 1056 import re as _re 1057 _webhook_match = _re.search( 1058 r'https?://(?:discord\.com/api/webhooks|hooks\.slack\.com|api\.telegram\.org)/\S+', 1059 text 1060 ) 1061 if _webhook_match: 1062 url = _webhook_match.group(0).rstrip(".,;!)'\"") 1063 urls = self.recall("_notification_urls") or {} 1064 if "discord" in url: 1065 urls["discord"] = url 1066 elif "slack" in url: 1067 urls["slack"] = url 1068 elif "telegram" in url: 1069 urls["telegram"] = url 1070 self.persist("_notification_urls", urls) 1071 logger.info(f"[{self.name}] Auto-saved webhook URL from message") 1072 1073 if stripped in ("/rules", "rules"): 1074 rules = self.get_pipeline_rules() 1075 if not rules: 1076 return note_prefix + "No pipeline rules active.\nDescribe a reactive rule to create one, e.g. 'when the door opens send me a Discord message'." 1077 lines = [f"Active pipeline rules ({len(rules)}):"] 1078 for rule_id, rule in sorted(rules.items(), key=lambda x: x[1].get("created_at", 0)): 1079 agents = rule.get("agents", []) 1080 task = rule.get("task", "")[:80] 1081 import datetime 1082 ts = rule.get("created_at", 0) 1083 created = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M") if ts else "unknown" 1084 # Check which agents are running 1085 running_agents = [] 1086 stopped_agents = [] 1087 for a in agents: 1088 if self._registry and self._registry.find_by_name(a): 1089 running_agents.append(a) 1090 else: 1091 stopped_agents.append(a) 1092 status = "🟢" if running_agents else "🔴" 1093 lines.append(f"\n{status} [{rule_id}] — {task}") 1094 lines.append(f" agents : {', '.join(agents)}") 1095 if stopped_agents: 1096 lines.append(f" stopped : {', '.join(stopped_agents)}") 1097 lines.append(f" created : {created}") 1098 lines.append("\nTo delete a rule: /rules delete <rule_id>") 1099 return note_prefix + "\n".join(lines) 1100 1101 if stripped.startswith("/rules delete "): 1102 rule_id = stripped[len("/rules delete "):].strip() 1103 result = await self.delete_pipeline_rule(rule_id) 1104 return note_prefix + result 1105 1106 if stripped.startswith("/rules"): 1107 keyword = stripped[14:].strip().lstrip("(").rstrip(")") 1108 caps = self.list_capabilities(keyword) 1109 if not caps: 1110 msg = "No agents found" + (f" matching '{keyword}'" if keyword else "") + "." 1111 msg += " Agents publish their capabilities on startup." 1112 return note_prefix + msg 1113 lines = ["Agent capabilities" + (" matching " + repr(keyword) if keyword else "") + ":"] 1114 for a in caps: 1115 lines.append("") 1116 lines.append(" [" + a["name"] + "]" + (" on " + a["node"] if a.get("node") else "")) 1117 lines.append(" description : " + a["description"]) 1118 if a["capabilities"]: 1119 lines.append(" capabilities: " + ", ".join(a["capabilities"])) 1120 if a["input_schema"]: 1121 lines.append(" input : " + str(a["input_schema"])) 1122 if a["output_schema"]: 1123 lines.append(" output : " + str(a["output_schema"])) 1124 return note_prefix + "\n".join(lines) 1125 1126 # ── @mention direct routing ───────────────────────────────────────── 1127 if text.startswith("@"): 1128 # Extract agent name and message: "@cpu-monitor-rpi-room what is the cpu?" 1129 parts = text.split(None, 1) 1130 target_name = parts[0].lstrip("@").rstrip(":,") 1131 message = parts[1].strip() if len(parts) > 1 else text 1132 1133 # Try local registry first 1134 local_target = self._registry.find_by_name(target_name) if self._registry else None 1135 if not local_target: 1136 # Not running — check if it's a spawnable catalog recipe 1137 manifest = self._agent_manifests.get(target_name, {}) 1138 if manifest.get("spawnable") and manifest.get("catalog"): 1139 catalog_name = manifest["catalog"] 1140 catalog_actor = self._registry.find_by_name(catalog_name) if self._registry else None 1141 if catalog_actor and hasattr(catalog_actor, "_action_spawn"): 1142 logger.info(f"[main] '{target_name}' not running — auto-spawning via {catalog_name}...") 1143 try: 1144 spawn_result = await catalog_actor._action_spawn(target_name, {}) 1145 if spawn_result and spawn_result.get("ok"): 1146 await asyncio.sleep(0.5) 1147 local_target = self._registry.find_by_name(target_name) if self._registry else None 1148 logger.info(f"[main] '{target_name}' spawned, routing task...") 1149 else: 1150 err = spawn_result.get("message", "unknown error") if spawn_result else "no response" 1151 return note_prefix + f"Could not spawn '{target_name}': {err}" 1152 except Exception as e: 1153 return note_prefix + f"Could not spawn '{target_name}': {e}" 1154 1155 if local_target: 1156 result = await self.delegate_task(target_name, message, timeout=60.0) 1157 if result: 1158 reply = result.get("result") or result.get("response") or str(result) 1159 return note_prefix + f"**{target_name}**: {reply}" 1160 return note_prefix + f"{target_name} did not respond." 1161 1162 # Check if it's a known remote agent 1163 remote_node = None 1164 for node_name, nd in self._known_nodes.items(): 1165 if target_name in nd.get("agents", []): 1166 remote_node = node_name 1167 break 1168 1169 if remote_node: 1170 # Send via MQTT and wait for reply 1171 import time as _t 1172 reply_topic = f"main/reply/{self.actor_id}/{uuid.uuid4().hex[:8]}" 1173 future: asyncio.Future = asyncio.get_event_loop().create_future() 1174 self._result_futures[reply_topic] = future 1175 1176 await self._mqtt_publish( 1177 f"agents/by-name/{target_name}/task", 1178 {"text": message, "_reply_topic": reply_topic, 1179 "_remote_task": True, "payload": message}, 1180 ) 1181 1182 # Subscribe briefly for the reply 1183 async def _wait_reply(): 1184 try: 1185 import aiomqtt 1186 async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client: 1187 await client.subscribe(reply_topic) 1188 async for msg in client.messages: 1189 try: 1190 data = json.loads(msg.payload.decode()) 1191 if not future.done(): 1192 future.set_result(data) 1193 except Exception: 1194 pass 1195 return 1196 except Exception as e: 1197 if not future.done(): 1198 future.set_exception(e) 1199 1200 reply_task = asyncio.create_task(_wait_reply()) 1201 try: 1202 result = await asyncio.wait_for(asyncio.shield(future), timeout=30.0) 1203 reply_task.cancel() 1204 reply = result.get("result") or result.get("response") or str(result) 1205 return note_prefix + f"**{target_name}** (on {remote_node}): {reply}" 1206 except asyncio.TimeoutError: 1207 reply_task.cancel() 1208 return note_prefix + f"{target_name} on {remote_node} did not respond within 30s." 1209 finally: 1210 self._result_futures.pop(reply_topic, None) 1211 1212 # Not found locally or remotely 1213 known_remote = [a for nd in self._known_nodes.values() for a in nd.get("agents", [])] 1214 if known_remote: 1215 return note_prefix + (f"Agent '{target_name}' not found. " 1216 f"Remote agents: {', '.join(known_remote)}") 1217 return note_prefix + f"Agent '{target_name}' not found." 1218 1219 # Explicit planner prefix always wins 1220 lowered = text.lower() 1221 if any(lowered.startswith(p) for p in ( 1222 "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ", 1223 "@planner", "set up a pipeline", "create a rule", "set up a rule", 1224 )): 1225 result = await self._run_planner(text) 1226 return note_prefix + (result or "Planner did not return a result. Please retry.") 1227 1228 # Single LLM call classifies intent: HA (direct action), PIPELINE (reactive rule), OTHER 1229 intent = await self._classify_intent(text) 1230 logger.info(f"[{self.name}] Intent: {intent} — {text[:60]}") 1231 1232 if intent == "PIPELINE": 1233 result = await self._run_planner(text) 1234 return note_prefix + (result or "Planner did not return a result. Please retry.") 1235 1236 if intent == "HA": 1237 result = await self.delegate_task("home-assistant-agent", text, timeout=120.0) 1238 if result and isinstance(result, dict) and result.get("result"): 1239 return note_prefix + str(result["result"]) 1240 if not result: 1241 return note_prefix + "I could not reach the Home Assistant agent right now. Please retry." 1242 return note_prefix + "The Home Assistant agent did not return a result. Please retry." 1243 1244 response = await self.chat(text) 1245 1246 # If the LLM wrote agent code but forgot the <spawn> wrapper, remind it once 1247 has_spawn = "<spawn>" in response 1248 has_code = "async def handle_task" in response or "async def setup" in response 1249 asked_spawn = any(w in text.lower() for w in ("spawn", "create", "make", "build", "add", "agent")) 1250 if has_code and not has_spawn and asked_spawn: 1251 logger.info(f"[{self.name}] Code written without <spawn> — prompting to wrap it") 1252 response = await self.chat( 1253 "You wrote agent code but forgot to wrap it in a <spawn> block. " 1254 "Please output the complete spawn block now with that exact code inside it. " 1255 "Output ONLY the <spawn>...</spawn> block, nothing else." 1256 ) 1257 1258 clean, spawned = await self._process_spawn_commands(response) 1259 1260 # Execute any @agent-name {payload} delegation patterns the LLM produced 1261 clean = await self._execute_llm_delegations(clean) 1262 1263 await self._mqtt_publish( 1264 f"agents/{self.actor_id}/logs", 1265 {"type": "user_interaction", "input": text[:100], "response": clean[:200]}, 1266 ) 1267 1268 if spawned: 1269 bg_names = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)] 1270 live_names = [a.name for a in spawned if not isinstance(a, _SpawnPlaceholder)] 1271 parts = [] 1272 if live_names: 1273 replaced = '"replace": true' in response or '"replace":true' in response 1274 action = "Replaced" if replaced else "Spawned" 1275 parts.append(f"{action} {', '.join(live_names)}") 1276 if bg_names: 1277 parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly") 1278 if parts: 1279 clean += f"\n\n[System: {' | '.join(parts)} — will auto-restore on restart]" 1280 1281 return note_prefix + clean
1283 async def process_user_input_stream(self, text: str): 1284 """ 1285 Streaming version of process_user_input(). 1286 Yields text chunks as the LLM generates them, then a final dict: 1287 {"done": True, "spawned": [...names...], "system_msg": "..."} 1288 1289 The CLI calls this and prints chunks immediately. 1290 REST/Discord/WhatsApp should use process_user_input() instead. 1291 """ 1292 # Drain monitor notifications first 1293 note_prefix = self._drain_notifications() 1294 if note_prefix: 1295 yield note_prefix 1296 1297 # All slash-commands and direct API intercepts are handled by process_user_input 1298 # Route them there to avoid duplicating all that logic here 1299 _stripped = text.strip().rstrip("()") 1300 _is_command = ( 1301 _stripped.startswith("/") 1302 or _stripped in ("list_nodes", "main.list_nodes", "rules") 1303 or _stripped.startswith("@") 1304 ) 1305 if _is_command: 1306 result = await self.process_user_input(text) 1307 yield result 1308 yield {"done": True, "spawned": [], "system_msg": ""} 1309 return 1310 1311 # Explicit planner prefix always wins 1312 _lowered = text.lower() 1313 if any(_lowered.startswith(p) for p in ( 1314 "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ", 1315 "@planner", "set up a pipeline", "create a rule", "set up a rule", 1316 )): 1317 result = await self._run_planner(text) 1318 yield result or "Planner did not return a result. Please retry." 1319 yield {"done": True, "spawned": [], "system_msg": ""} 1320 return 1321 1322 # Single LLM call classifies intent: HA, PIPELINE, or OTHER 1323 intent = await self._classify_intent(text) 1324 logger.info(f"[{self.name}] Intent: {intent} — {text[:60]}") 1325 1326 if intent == "PIPELINE": 1327 result = await self._run_planner(text) 1328 yield result or "Planner did not return a result. Please retry." 1329 yield {"done": True, "spawned": [], "system_msg": ""} 1330 return 1331 1332 if intent == "HA": 1333 result = await self.delegate_task("home-assistant-agent", text, timeout=120.0) 1334 if result and isinstance(result, dict) and result.get("result"): 1335 yield str(result["result"]) 1336 elif not result: 1337 yield "I could not reach the Home Assistant agent right now. Please retry." 1338 else: 1339 yield "The Home Assistant agent did not return a result. Please retry." 1340 yield {"done": True, "spawned": [], "system_msg": ""} 1341 return 1342 1343 # Stream the LLM response chunk by chunk 1344 full_chunks = [] 1345 async for chunk in self.chat_stream(text): 1346 if isinstance(chunk, dict): 1347 break # usage dict — discard, already tracked inside chat_stream 1348 full_chunks.append(chunk) 1349 yield chunk 1350 1351 full_response = "".join(full_chunks) 1352 1353 # Process any <spawn> blocks in the completed response 1354 _, spawned = await self._process_spawn_commands(full_response) 1355 1356 # Execute any @agent-name {payload} delegation patterns the LLM produced 1357 # If delegations ran, yield the results as an additional chunk 1358 delegated = await self._execute_llm_delegations(full_response) 1359 if delegated != full_response: 1360 # Find what changed and yield just the new parts 1361 import re as _re 1362 results = _re.findall(r'[✅❌]\s+\S+.*', delegated) 1363 if results: 1364 yield "\n" + "\n".join(results) 1365 full_response = delegated 1366 1367 system_msg = "" 1368 if spawned: 1369 names = ", ".join(f"'{a.name}'" for a in spawned if not isinstance(a, _SpawnPlaceholder)) 1370 bg_names = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)] 1371 parts = [] 1372 if names: 1373 replaced = '"replace": true' in full_response or '"replace":true' in full_response 1374 parts.append(f"{'Replaced' if replaced else 'Spawned'} {names} — will auto-restore on restart") 1375 if bg_names: 1376 parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly") 1377 system_msg = " | ".join(parts) 1378 1379 await self._mqtt_publish( 1380 f"agents/{self.actor_id}/logs", 1381 {"type": "user_interaction", "input": text[:100], "response": full_response[:200]}, 1382 ) 1383 1384 yield {"done": True, "spawned": spawned, "system_msg": system_msg}
Streaming version of process_user_input(). Yields text chunks as the LLM generates them, then a final dict: {"done": True, "spawned": [...names...], "system_msg": "..."}
The CLI calls this and prints chunks immediately. REST/Discord/WhatsApp should use process_user_input() instead.
1877 async def run_pipeline(self, goal: str, agents: list[str], timeout: float = 300.0, force_replan: bool = False) -> dict: 1878 """ 1879 Spawn an ephemeral TaskManager to coordinate a multi-agent pipeline. 1880 Returns the final synthesised result without blocking main's context. 1881 1882 Usage: 1883 result = await main.run_pipeline( 1884 goal="Find the Philips EP2220 manual and answer: how do I descale it?", 1885 agents=["manual-agent", "installer"] 1886 ) 1887 """ 1888 from .task_manager import TaskManager 1889 import uuid 1890 1891 task_id = uuid.uuid4().hex[:8] 1892 future = asyncio.get_event_loop().create_future() 1893 self._result_futures[task_id] = future 1894 1895 mgr = await self.spawn( 1896 TaskManager, 1897 goal=goal, 1898 available_agents=agents, 1899 llm_provider=self.llm, 1900 reply_to_id=self.actor_id, 1901 reply_task_id=task_id, 1902 auto_destroy=True, 1903 force_replan=force_replan, 1904 cache_dir=str(self._persistence_dir.parent / "plan_cache"), 1905 persistence_dir=str(self._persistence_dir.parent), 1906 ) 1907 1908 logger.info(f"[{self.name}] Pipeline started: {mgr.name} for goal: {goal[:60]}") 1909 1910 try: 1911 result = await asyncio.wait_for(future, timeout=timeout) 1912 return result 1913 except asyncio.TimeoutError: 1914 logger.warning(f"[{self.name}] Pipeline timed out after {timeout}s") 1915 return {"error": f"Pipeline timed out after {timeout}s"} 1916 finally: 1917 self._result_futures.pop(task_id, None)
Spawn an ephemeral TaskManager to coordinate a multi-agent pipeline. Returns the final synthesised result without blocking main's context.
Usage: result = await main.run_pipeline( goal="Find the Philips EP2220 manual and answer: how do I descale it?", agents=["manual-agent", "installer"] )
1985 def list_nodes(self) -> list[dict]: 1986 """Return all known remote nodes with their last-seen time and running agents.""" 1987 import time as _time 1988 now = _time.time() 1989 return [ 1990 { 1991 "node": name, 1992 "agents": info.get("agents", []), 1993 "last_seen": info.get("last_seen", 0), 1994 "online": (now - info.get("last_seen", 0)) < 30, 1995 } 1996 for name, info in self._known_nodes.items() 1997 ]
Return all known remote nodes with their last-seen time and running agents.
1999 def list_topics(self, keyword: str = "") -> list[dict]: 2000 """ 2001 Return all known MQTT topics published by agents, optionally filtered by keyword. 2002 Each entry: {"topic": str, "agents": [{"name", "node", "description"}, ...]} 2003 2004 Example: 2005 list_topics("cpu") → topics containing "cpu" 2006 list_topics("temp") → topics containing "temp" 2007 list_topics() → all topics 2008 """ 2009 results = [] 2010 kw = keyword.lower() 2011 for topic, manifests in self._topic_registry.items(): 2012 if kw and kw not in topic.lower(): 2013 continue 2014 results.append({ 2015 "topic": topic, 2016 "agents": [{"name": m.get("name"), "node": m.get("node"), 2017 "description": m.get("description", "")} for m in manifests], 2018 }) 2019 return sorted(results, key=lambda x: x["topic"])
Return all known MQTT topics published by agents, optionally filtered by keyword. Each entry: {"topic": str, "agents": [{"name", "node", "description"}, ...]}
Example: list_topics("cpu") → topics containing "cpu" list_topics("temp") → topics containing "temp" list_topics() → all topics
2021 def list_capabilities(self, keyword: str = "") -> list[dict]: 2022 """ 2023 Return all known agents with their full capability profile: 2024 name, description, capabilities, input_schema, output_schema. 2025 2026 Example: 2027 list_capabilities() → all agents 2028 list_capabilities("weather") → agents with "weather" in description/capabilities 2029 """ 2030 results = [] 2031 kw = keyword.lower().strip() 2032 # Support multi-word keywords — match if ANY word appears in the haystack 2033 kw_words = kw.split() if kw else [] 2034 for name, manifest in self._agent_manifests.items(): 2035 desc = manifest.get("description", "") 2036 caps = manifest.get("capabilities", []) 2037 # Filter by keyword across description, capabilities, and name 2038 if kw_words: 2039 haystack = desc.lower() + " " + " ".join(caps).lower() + " " + name.lower() 2040 if not any(w in haystack for w in kw_words): 2041 continue 2042 results.append({ 2043 "name": name, 2044 "node": manifest.get("node"), 2045 "description": desc, 2046 "capabilities": caps, 2047 "input_schema": manifest.get("input_schema", {}), 2048 "output_schema": manifest.get("output_schema", {}), 2049 "spawnable": manifest.get("spawnable", False), 2050 "running": bool(self._registry and self._registry.find_by_name(name)), 2051 }) 2052 return sorted(results, key=lambda x: x["name"])
Return all known agents with their full capability profile: name, description, capabilities, input_schema, output_schema.
Example: list_capabilities() → all agents list_capabilities("weather") → agents with "weather" in description/capabilities
2101 async def migrate_agent(self, agent_name: str, target_node: str) -> dict: 2102 """ 2103 Move a running agent to a different node. 2104 2105 If the agent is local: saves updated config (with new node) and re-spawns remotely. 2106 If the agent is remote: publishes a migrate command to its current node. 2107 Returns {"success": bool, "message": str} 2108 """ 2109 import time as _time 2110 2111 reg = self._get_spawn_registry() 2112 config = reg.get(agent_name) 2113 if not config: 2114 return {"success": False, "message": f"Agent '{agent_name}' not in spawn registry."} 2115 2116 current_node = config.get("node", "").strip() 2117 2118 if current_node == target_node: 2119 return {"success": False, "message": f"Agent '{agent_name}' is already on '{target_node}'."} 2120 2121 if current_node: 2122 # ── Remote → Remote migration ──────────────────────────────────── 2123 logger.info(f"[{self.name}] Migrating '{agent_name}' from node '{current_node}' → '{target_node}'") 2124 await self._mqtt_publish( 2125 f"nodes/{current_node}/migrate", 2126 {"name": agent_name, "target_node": target_node}, 2127 ) 2128 else: 2129 # ── Local → Remote migration ───────────────────────────────────── 2130 logger.info(f"[{self.name}] Migrating LOCAL agent '{agent_name}' → remote node '{target_node}'") 2131 2132 # Stop the local instance 2133 if self._registry: 2134 local = self._registry.find_by_name(agent_name) 2135 if local: 2136 try: 2137 await self._registry.unregister(local.actor_id) 2138 await local.stop() 2139 await asyncio.sleep(0.3) 2140 except Exception as e: 2141 logger.warning(f"[{self.name}] Could not stop local '{agent_name}': {e}") 2142 2143 # Update config with new node target and re-spawn remotely 2144 new_config = dict(config) 2145 new_config["node"] = target_node 2146 new_config.pop("replace", None) 2147 2148 await self._spawn_remote(new_config, target_node, save=True) 2149 2150 # Update spawn registry so next restart re-spawns to the right node 2151 updated = dict(config) 2152 updated["node"] = target_node 2153 self._save_to_spawn_registry(updated) 2154 2155 msg = (f"Migrating '{agent_name}' from '{current_node or 'local'}' " 2156 f"→ '{target_node}'. It will appear in the dashboard shortly.") 2157 logger.info(f"[{self.name}] {msg}") 2158 return {"success": True, "message": msg}
Move a running agent to a different node.
If the agent is local: saves updated config (with new node) and re-spawns remotely. If the agent is remote: publishes a migrate command to its current node. Returns {"success": bool, "message": str}
2221 async def delegate_to_installer(self, payload: dict, timeout: float = 300.0) -> dict: 2222 """ 2223 Send a task to the installer agent and wait for the result. 2224 Handles node_deploy, node_install, node_run, install, check actions. 2225 timeout is generous (300s) because deploys involve SSH + pip installs. 2226 """ 2227 if not self._registry: 2228 return {"error": "No registry available"} 2229 installer = self._registry.find_by_name("installer") 2230 if not installer: 2231 return {"error": "installer agent not found"} 2232 2233 import uuid as _uuid 2234 task_id = f"inst_{_uuid.uuid4().hex[:8]}" 2235 future: asyncio.Future = asyncio.get_event_loop().create_future() 2236 self._result_futures[task_id] = future 2237 2238 payload = dict(payload) 2239 payload["_task_id"] = task_id 2240 payload["task"] = task_id 2241 2242 await self.send(installer.actor_id, MessageType.TASK, payload) 2243 try: 2244 return await asyncio.wait_for(future, timeout=timeout) 2245 except asyncio.TimeoutError: 2246 return {"error": f"Installer timed out after {timeout}s"} 2247 finally: 2248 self._result_futures.pop(task_id, None)
Send a task to the installer agent and wait for the result. Handles node_deploy, node_install, node_run, install, check actions. timeout is generous (300s) because deploys involve SSH + pip installs.
316 async def _delegate_task_with_normalized_key(self, target_name: str, task: Any, timeout: float = 60.0): 317 if not self._registry: 318 return None 319 target = self._registry.find_by_name(target_name) 320 if not target: 321 return None 322 323 task_key = _normalize_delegate_task_key(task) 324 future = asyncio.get_event_loop().create_future() 325 self._result_futures[task_key] = future 326 await self.send( 327 target.actor_id, 328 MessageType.TASK, 329 {"text": task, "task": task_key, "reply_to": self.actor_id}, 330 ) 331 try: 332 return await asyncio.wait_for(future, timeout=timeout) 333 except asyncio.TimeoutError: 334 return None 335 finally: 336 self._result_futures.pop(task_key, None)
The type of the None singleton.
2278 async def delete_spawned_agent(self, name: str): 2279 # Find node before removing from registry 2280 reg = self._get_spawn_registry() 2281 node = reg.get(name, {}).get("node", "").strip() 2282 2283 self._remove_from_spawn_registry(name) 2284 2285 # Update desired state so Pi doesn't re-spawn on reconcile 2286 if node: 2287 await self._update_node_desired_state(node, remove_name=name) 2288 await self._mqtt_publish(f"nodes/{node}/stop", {"name": name}, qos=1) 2289 2290 if self._registry: 2291 target = self._registry.find_by_name(name) 2292 if target: 2293 await self._registry.unregister(target.actor_id) 2294 await target.stop()
29class MonitorActor(Actor): 30 31 def __init__( 32 self, 33 check_interval: float = 15.0, 34 heartbeat_timeout: float = 60.0, 35 auto_restart: bool = False, 36 **kwargs, 37 ): 38 kwargs.setdefault("name", "monitor") 39 super().__init__(**kwargs) 40 self.check_interval = check_interval 41 self.heartbeat_timeout = heartbeat_timeout 42 self.auto_restart = auto_restart 43 self.protected = True 44 45 self._last_seen: dict[str, float] = {} 46 self._alert_state: dict[str, bool] = {} 47 48 # Error event registry: actor_id → latest error event dict 49 self._error_registry: dict[str, dict] = {} 50 # Cooldown: actor_id → last time we notified main about it 51 self._last_notified: dict[str, float] = {} 52 # Track which actors we've attempted to restart this session 53 self._restart_attempts: dict[str, int] = {} 54 55 async def on_start(self): 56 if self._registry: 57 now = time.time() 58 for actor in self._registry.all_actors(): 59 if actor.actor_id != self.actor_id: 60 self._last_seen[actor.actor_id] = now 61 62 self._tasks.append(asyncio.create_task(self._monitor_loop())) 63 logger.info(f"[{self.name}] Monitor started. check_interval={self.check_interval}s") 64 65 # ── Message handling ─────────────────────────────────────────────────── 66 67 async def handle_message(self, msg: Message): 68 # Heartbeat — any message counts as alive 69 if msg.sender_id and msg.sender_id != self.actor_id: 70 self._last_seen[msg.sender_id] = time.time() 71 if self._alert_state.get(msg.sender_id): 72 logger.info(f"[{self.name}] Actor {msg.sender_id[:8]} recovered.") 73 self._alert_state[msg.sender_id] = False 74 75 # Structured error event from agents/{id}/errors (routed via MQTT bridge) 76 if msg.type == MessageType.TASK and isinstance(msg.payload, dict): 77 if msg.payload.get("_monitor_error_event"): 78 await self._handle_error_event(msg.payload) 79 80 # ── Monitor loop ─────────────────────────────────────────────────────── 81 82 async def _monitor_loop(self): 83 while self.state not in (ActorState.STOPPED, ActorState.FAILED): 84 try: 85 await asyncio.sleep(self.check_interval) 86 await self._ping_all_actors() 87 await self._check_all_actors() 88 await self._check_error_registry() 89 await self._publish_system_health() 90 except asyncio.CancelledError: 91 break 92 except Exception as e: 93 logger.error(f"[{self.name}] Monitor loop error: {e}") 94 95 async def _ping_all_actors(self): 96 if not self._registry: 97 return 98 for actor in self._registry.all_actors(): 99 if actor.actor_id != self.actor_id: 100 try: 101 await self.send(actor.actor_id, MessageType.STATUS_REQUEST, None) 102 except Exception: 103 pass 104 105 async def _check_all_actors(self): 106 if not self._registry: 107 return 108 now = time.time() 109 for actor in self._registry.all_actors(): 110 if actor.actor_id == self.actor_id: 111 continue 112 if actor.actor_id not in self._last_seen: 113 self._last_seen[actor.actor_id] = now 114 continue 115 if actor.state == ActorState.RUNNING: 116 start_age = now - (actor.metrics.start_time or now) 117 if start_age < self.heartbeat_timeout: 118 self._last_seen[actor.actor_id] = max( 119 self._last_seen[actor.actor_id], now - start_age 120 ) 121 # Heartbeat fires every 10s — use as secondary liveness signal 122 hb = getattr(actor.metrics, "last_heartbeat", None) 123 if hb and hb > self._last_seen.get(actor.actor_id, 0): 124 self._last_seen[actor.actor_id] = hb 125 126 gap = now - self._last_seen[actor.actor_id] 127 if gap > self.heartbeat_timeout and actor.state == ActorState.RUNNING: 128 if not self._alert_state.get(actor.actor_id): 129 self._alert_state[actor.actor_id] = True 130 await self._fire_heartbeat_alert(actor, gap) 131 if self.auto_restart: 132 await self._attempt_restart(actor, reason="heartbeat timeout") 133 else: 134 if self._alert_state.get(actor.actor_id) and gap <= self.heartbeat_timeout: 135 self._alert_state[actor.actor_id] = False 136 137 # ── Error event handling ─────────────────────────────────────────────── 138 139 async def _handle_error_event(self, event: dict): 140 """ 141 Called when an agent publishes a structured error. 142 Decides: log / restart / escalate to user. 143 """ 144 actor_id = event.get("actor_id", "") 145 name = event.get("name", actor_id[:8]) 146 phase = event.get("phase", "unknown") 147 error = event.get("error", "") 148 severity = event.get("severity", "warning") 149 fatal = event.get("fatal", False) 150 degraded = event.get("degraded", False) 151 consec = event.get("consecutive", 1) 152 153 # Store in registry for health checks 154 self._error_registry[actor_id] = event 155 156 logger.warning( 157 f"[{self.name}] Error event from '{name}': " 158 f"phase={phase} severity={severity} consecutive={consec}" 159 ) 160 161 # ── Recovery decision ────────────────────────────────────────────── 162 if fatal: 163 # Bad code / setup failure — restart won't help without a fix 164 msg = ( 165 f"**{name}** failed during *{phase}* and cannot run: `{error}`. " 166 f"The agent needs its code fixed before it can be used." 167 ) 168 await self._notify_main(actor_id, name, msg, severity="critical") 169 await self._fire_error_alert(event) 170 171 elif severity == "critical" or degraded: 172 # Repeated runtime errors — try a restart 173 actor = self._find_actor(actor_id) 174 if actor and self._restart_attempts.get(actor_id, 0) < 3: 175 self._restart_attempts[actor_id] = self._restart_attempts.get(actor_id, 0) + 1 176 restarted = await self._attempt_restart(actor, reason=f"{phase} error (attempt {self._restart_attempts[actor_id]})") 177 if restarted: 178 msg = ( 179 f"**{name}** kept crashing in *{phase}* ({consec}x), " 180 f"so I restarted it. Latest error: `{error}`." 181 ) 182 else: 183 msg = ( 184 f"**{name}** is crashing repeatedly in *{phase}* " 185 f"and I couldn't restart it. Error: `{error}`." 186 ) 187 else: 188 attempts = self._restart_attempts.get(actor_id, 0) 189 msg = ( 190 f"**{name}** has failed {consec} times in *{phase}* " 191 f"(restart attempted {attempts}x). Error: `{error}`. " 192 f"It may need its code fixed." 193 ) 194 await self._notify_main(actor_id, name, msg, severity="critical") 195 await self._fire_error_alert(event) 196 197 else: 198 # Single warning — log and let agent recover on its own 199 await self._fire_error_alert(event) 200 201 async def _check_error_registry(self): 202 """Periodically re-notify main about persistently degraded agents.""" 203 now = time.time() 204 for actor_id, event in list(self._error_registry.items()): 205 last = self._last_notified.get(actor_id, 0) 206 if event.get("degraded") and (now - last) > _NOTIFY_COOLDOWN: 207 actor = self._find_actor(actor_id) 208 name = event.get("name", actor_id[:8]) 209 # If agent has recovered (error count reset), clean up registry 210 if actor and hasattr(actor, "_consecutive_errors") and actor._consecutive_errors == 0: 211 del self._error_registry[actor_id] 212 await self._notify_main( 213 actor_id, name, 214 f"**{name}** has recovered and is running normally again. ✅", 215 severity="info", 216 ) 217 218 # ── User notification ────────────────────────────────────────────────── 219 220 async def _notify_main( 221 self, 222 actor_id: str, 223 agent_name: str, 224 message: str, 225 severity: str = "warning", 226 ): 227 """ 228 Send a structured notification to MainActor so it can relay to the user 229 in natural language during their next interaction (or immediately if idle). 230 """ 231 now = time.time() 232 cooldown = self._last_notified.get(actor_id, 0) 233 if (now - cooldown) < _NOTIFY_COOLDOWN and severity != "info": 234 return # Don't spam 235 236 self._last_notified[actor_id] = now 237 238 if not self._registry: 239 return 240 main = self._registry.find_by_name("main") 241 if not main: 242 return 243 244 try: 245 await self.send(main.actor_id, MessageType.TASK, { 246 "_monitor_notification": True, 247 "agent_name": agent_name, 248 "message": message, 249 "severity": severity, 250 "timestamp": now, 251 }) 252 logger.info(f"[{self.name}] Notified main about '{agent_name}': {message[:80]}") 253 except Exception as e: 254 logger.error(f"[{self.name}] Failed to notify main: {e}") 255 256 # ── Alerting ─────────────────────────────────────────────────────────── 257 258 async def _fire_heartbeat_alert(self, actor: Actor, gap: float): 259 alert = { 260 "actor_id": actor.actor_id, 261 "name": actor.name, 262 "last_seen_ago": gap, 263 "state": actor.state.value, 264 "timestamp": time.time(), 265 "severity": "warning" if gap < 120 else "critical", 266 } 267 logger.warning(f"[{self.name}] ALERT: {actor.name} unresponsive for {gap:.0f}s") 268 await self._mqtt_publish(f"agents/{actor.actor_id}/alert", alert) 269 270 # Notify main only for user-spawned agents 271 _infra = {"monitor", "installer", "main", "code-agent", 272 "anomaly-detector", "home-assistant-agent"} 273 if actor.name not in _infra: 274 await self._notify_main( 275 actor.actor_id, 276 actor.name, 277 f"**{actor.name}** has been unresponsive for {gap:.0f}s.", 278 severity="warning", 279 ) 280 281 async def _fire_error_alert(self, event: dict): 282 await self._mqtt_publish( 283 f"agents/{event.get('actor_id', 'unknown')}/alert", 284 { 285 "actor_id": event.get("actor_id"), 286 "name": event.get("name"), 287 "message": f"[{event.get('phase')}] {event.get('error')}", 288 "severity": event.get("severity", "warning"), 289 "timestamp": time.time(), 290 }, 291 ) 292 293 # ── Restart ──────────────────────────────────────────────────────────── 294 295 async def _attempt_restart(self, actor: Actor, reason: str = "") -> bool: 296 logger.info(f"[{self.name}] Restarting '{actor.name}' — reason: {reason}") 297 try: 298 if actor.state != ActorState.STOPPED: 299 await actor.stop() 300 await asyncio.sleep(0.5) 301 await actor.start() 302 self._last_seen[actor.actor_id] = time.time() 303 logger.info(f"[{self.name}] '{actor.name}' restarted successfully.") 304 return True 305 except Exception as e: 306 logger.error(f"[{self.name}] Restart of '{actor.name}' failed: {e}") 307 return False 308 309 # ── Helpers ──────────────────────────────────────────────────────────── 310 311 def _find_actor(self, actor_id: str) -> Optional[Actor]: 312 if not self._registry: 313 return None 314 for a in self._registry.all_actors(): 315 if a.actor_id == actor_id: 316 return a 317 return None 318 319 async def _publish_system_health(self): 320 if not self._registry: 321 return 322 now = time.time() 323 actors = self._registry.all_actors() 324 health = { 325 "timestamp": now, 326 "total_actors": len(actors), 327 "running": sum(1 for a in actors if a.state == ActorState.RUNNING), 328 "stopped": sum(1 for a in actors if a.state == ActorState.STOPPED), 329 "failed": sum(1 for a in actors if a.state == ActorState.FAILED), 330 "degraded": len(self._error_registry), 331 "actors": [ 332 { 333 "id": a.actor_id, 334 "name": a.name, 335 "state": a.state.value, 336 "last_seen_ago": now - self._last_seen.get(a.actor_id, now), 337 "consecutive_errors": getattr(a, "_consecutive_errors", 0), 338 "error_phase": getattr(a, "_error_phase", ""), 339 } 340 for a in actors 341 ], 342 } 343 await self._mqtt_publish("system/health", health)
Base Actor class. All agents inherit from this. Actors are fully async and communicate only through messages.
31 def __init__( 32 self, 33 check_interval: float = 15.0, 34 heartbeat_timeout: float = 60.0, 35 auto_restart: bool = False, 36 **kwargs, 37 ): 38 kwargs.setdefault("name", "monitor") 39 super().__init__(**kwargs) 40 self.check_interval = check_interval 41 self.heartbeat_timeout = heartbeat_timeout 42 self.auto_restart = auto_restart 43 self.protected = True 44 45 self._last_seen: dict[str, float] = {} 46 self._alert_state: dict[str, bool] = {} 47 48 # Error event registry: actor_id → latest error event dict 49 self._error_registry: dict[str, dict] = {} 50 # Cooldown: actor_id → last time we notified main about it 51 self._last_notified: dict[str, float] = {} 52 # Track which actors we've attempted to restart this session 53 self._restart_attempts: dict[str, int] = {}
55 async def on_start(self): 56 if self._registry: 57 now = time.time() 58 for actor in self._registry.all_actors(): 59 if actor.actor_id != self.actor_id: 60 self._last_seen[actor.actor_id] = now 61 62 self._tasks.append(asyncio.create_task(self._monitor_loop())) 63 logger.info(f"[{self.name}] Monitor started. check_interval={self.check_interval}s")
Called when actor starts. Override for init logic.
67 async def handle_message(self, msg: Message): 68 # Heartbeat — any message counts as alive 69 if msg.sender_id and msg.sender_id != self.actor_id: 70 self._last_seen[msg.sender_id] = time.time() 71 if self._alert_state.get(msg.sender_id): 72 logger.info(f"[{self.name}] Actor {msg.sender_id[:8]} recovered.") 73 self._alert_state[msg.sender_id] = False 74 75 # Structured error event from agents/{id}/errors (routed via MQTT bridge) 76 if msg.type == MessageType.TASK and isinstance(msg.payload, dict): 77 if msg.payload.get("_monitor_error_event"): 78 await self._handle_error_event(msg.payload)
Handle messages not caught by default handlers.
37class ManualAgent(Actor): 38 """ 39 Pre-defined agent that finds, downloads, and answers questions from device manuals. 40 Requires: httpx (+ pdfplumber or pymupdf for PDF extraction) 41 """ 42 43 def __init__(self, llm_provider=None, **kwargs): 44 kwargs.setdefault("name", "manual-agent") 45 super().__init__(**kwargs) 46 self.llm = llm_provider 47 self._manual_text: Optional[str] = None 48 self._manual_device: Optional[str] = None 49 self._manual_url: Optional[str] = None 50 self._manual_pages: int = 0 51 52 def _current_task_description(self) -> str: 53 if self._manual_device: 54 return f"loaded: {self._manual_device}" 55 return "idle — no manual loaded" 56 57 async def on_start(self): 58 await self._mqtt_publish( 59 f"agents/{self.actor_id}/logs", 60 {"type": "log", "message": "Manual agent ready. Send {action: load_manual, device: ...} to begin.", "timestamp": time.time()}, 61 ) 62 logger.info(f"[{self.name}] Ready.") 63 64 # ── Direct chat() entry point (used by CLIInterface) ─────────────────── 65 66 async def chat(self, message: str) -> str: 67 """ 68 Synchronous-style entry point for CLIInterface and other direct callers. 69 Parses the message as JSON payload or plain-text question, executes the 70 action, and returns a human-readable string response. 71 """ 72 payload = None 73 stripped = message.strip() 74 if stripped.startswith("{"): 75 try: 76 payload = json.loads(stripped) 77 except json.JSONDecodeError: 78 pass 79 80 if payload and isinstance(payload, dict): 81 result = await self._handle_task_payload(payload) 82 else: 83 if self._manual_text: 84 result = await self._ask(stripped) 85 else: 86 result = { 87 "error": "No manual loaded yet.", 88 "hint": 'Send: {"action": "load_manual", "device": "Your Device Model"}', 89 } 90 91 return self._format_result(result) 92 93 def _format_result(self, result: dict) -> str: 94 """Turn a result dict into a readable string for chat output.""" 95 if "error" in result: 96 msg = result["error"] 97 hint = result.get("hint", "") 98 return f"[error] {msg}\n{hint}".strip() 99 100 if "answer" in result: 101 return result["answer"] 102 103 if result.get("success"): 104 return ( 105 f"Manual loaded: {result.get('device', '?')}\n" 106 f" URL: {result.get('url', '?')}\n" 107 f" Pages: {result.get('pages', '?')}\n" 108 f" Chars: {result.get('chars', '?'):,}\n" 109 f" Preview: {result.get('preview', '')[:200]}" 110 ) 111 112 if "status" in result: 113 if result["status"] == "cleared": 114 return "Manual cleared." 115 if result["status"] == "loaded": 116 return ( 117 f"Loaded: {result.get('device', '?')} " 118 f"({result.get('pages', '?')} pages, {result.get('chars', '?'):,} chars)" 119 ) 120 return result.get("message", str(result)) 121 122 return str(result) 123 124 # ── Message-based entry point (actor mailbox) ────────────────────────── 125 126 async def handle_message(self, msg: Message): 127 if msg.type == MessageType.TASK: 128 try: 129 result = await self._handle_task(msg) 130 except Exception as e: 131 logger.error(f"[{self.name}] Task handling failed: {e}", exc_info=True) 132 result = {"error": f"Internal error: {e}"} 133 134 target = msg.reply_to or msg.sender_id 135 if target: 136 await self.send(target, MessageType.RESULT, result) 137 else: 138 logger.warning( 139 f"[{self.name}] No reply target (reply_to={msg.reply_to!r}, " 140 f"sender_id={msg.sender_id!r}). Result discarded: {result}" 141 ) 142 143 async def _handle_task(self, msg: Message) -> dict: 144 payload = msg.payload if isinstance(msg.payload, dict) else {} 145 if not isinstance(msg.payload, dict): 146 text = str(msg.payload).strip() 147 if text: 148 return await self._ask(text) 149 return {"error": "Send a dict payload with 'action' key"} 150 151 return await self._handle_task_payload(payload) 152 153 async def _handle_task_payload(self, payload: dict) -> dict: 154 """Core task dispatcher — shared by both chat() and handle_message().""" 155 action = payload.get("action", "").lower() 156 157 if action == "load_manual": 158 device = payload.get("device") or payload.get("query", "") 159 if not device: 160 return {"error": "Missing 'device' field"} 161 return await self._load_manual(device) 162 163 if action == "ask": 164 question = payload.get("question") or payload.get("query") or payload.get("text", "") 165 if not question: 166 return {"error": "Missing 'question' field"} 167 return await self._ask(question) 168 169 if action == "status": 170 return self._status() 171 172 if action == "clear": 173 self._manual_text = None 174 self._manual_device = None 175 self._manual_url = None 176 self._manual_pages = 0 177 return {"status": "cleared"} 178 179 if "question" in payload or "query" in payload: 180 return await self._ask(payload.get("question") or payload.get("query", "")) 181 182 return { 183 "error": f"Unknown action: '{action}'", 184 "supported": ["load_manual", "ask", "status", "clear"], 185 } 186 187 # ── Load manual ──────────────────────────────────────────────────────── 188 189 async def _load_manual(self, device: str) -> dict: 190 await self._log(f"Searching for manual: {device}") 191 192 loop = asyncio.get_event_loop() 193 pdf_url = await loop.run_in_executor(None, lambda: self._search_for_manual(device)) 194 195 if not pdf_url: 196 await self._alert(f"No PDF manual found for: {device}", "warning") 197 return {"error": f"Could not find a PDF manual for: {device}"} 198 199 await self._log(f"Found: {pdf_url}") 200 201 pdf_bytes = await self._download_pdf(pdf_url) 202 if not pdf_bytes: 203 return {"error": f"Failed to download PDF from: {pdf_url}"} 204 205 size_kb = len(pdf_bytes) // 1024 206 await self._log(f"Downloaded {size_kb} KB — extracting text...") 207 208 text, pages = await loop.run_in_executor(None, lambda: self._extract_text(pdf_bytes)) 209 if not text: 210 return {"error": "PDF has no extractable text (may be a scanned image PDF)."} 211 212 self._manual_text = text 213 self._manual_device = device 214 self._manual_url = pdf_url 215 self._manual_pages = pages 216 217 await self._log(f"Manual loaded: {device} — {pages} pages, {len(text):,} chars") 218 await self._publish_status() 219 220 return { 221 "success": True, 222 "device": device, 223 "url": pdf_url, 224 "pages": pages, 225 "chars": len(text), 226 "preview": text[:300].replace("\n", " ").strip(), 227 } 228 229 # ── Search ───────────────────────────────────────────────────────────── 230 231 def _search_for_manual(self, device: str) -> Optional[str]: 232 try: 233 import httpx 234 except ImportError: 235 logger.error(f"[{self.name}] httpx is not installed — cannot search for manuals") 236 return None 237 238 headers = { 239 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36", 240 "Accept-Language": "en-US,en;q=0.9", 241 "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", 242 } 243 244 # ── Pass 1: direct Philips document server (model number pattern) ── 245 model_m = re.search(r'EP\d{4}', device, re.IGNORECASE) 246 if model_m: 247 model = model_m.group(0).upper() 248 ml = model.lower() 249 direct_urls = [ 250 f"https://www.download.p4c.philips.com/files/e/{ml}/{ml}_pss_aenghk.pdf", 251 f"https://www.download.p4c.philips.com/files/e/{ml}_31/{ml}_31_pss_aenghk.pdf", 252 f"https://www.download.p4c.philips.com/files/e/{ml}/{ml}_user_manual_en.pdf", 253 f"https://www.documents.philips.com/doclib/enc/fetch/2000/4504/261257/261271/User_Manual_{model}.pdf", 254 ] 255 try: 256 with httpx.Client(follow_redirects=True, timeout=10, headers=headers) as client: 257 for url in direct_urls: 258 try: 259 r = client.head(url) 260 ct = r.headers.get("content-type", "") 261 if r.status_code == 200 and ("pdf" in ct or url.endswith(".pdf")): 262 logger.info(f"[{self.name}] Direct URL works: {url}") 263 return url 264 except Exception as e: 265 logger.debug(f"[{self.name}] Direct URL failed ({url}): {e}") 266 continue 267 except Exception as e: 268 logger.warning(f"[{self.name}] Philips direct check failed: {e}") 269 270 # ── Pass 2: DDGS search ──────────────────────────────────────────── 271 result = self._search_ddgs(device) 272 if result: 273 return result 274 275 # ── Pass 3: Bing scrape (with redirect URL decoding) ─────────────── 276 result = self._search_bing_scrape(device, headers) 277 if result: 278 return result 279 280 # ── Pass 4: Google scrape fallback ───────────────────────────────── 281 result = self._search_google_scrape(device, headers) 282 if result: 283 return result 284 285 logger.warning(f"[{self.name}] All search passes exhausted — no manual found for: {device}") 286 return None 287 288 # ── Pass 2: DDGS ────────────────────────────────────────────────────── 289 290 def _search_ddgs(self, device: str) -> Optional[str]: 291 queries = [ 292 f"{device} user manual filetype:pdf", 293 f"{device} user manual PDF manualslib OR manualzz", 294 f"{device} owner manual PDF download", 295 ] 296 297 def get_url(r): 298 return r.get("href") or r.get("url") or r.get("link") or "" 299 300 try: 301 try: 302 from ddgs import DDGS 303 logger.info(f"[{self.name}] Pass 2: using ddgs package") 304 except ImportError: 305 from duckduckgo_search import DDGS 306 logger.info(f"[{self.name}] Pass 2: using duckduckgo_search (deprecated)") 307 308 with DDGS() as ddgs: 309 for query in queries: 310 try: 311 results = list(ddgs.text(query, max_results=15)) 312 logger.info(f"[{self.name}] Pass 2 query: {query!r} → {len(results)} results") 313 314 for i, r in enumerate(results[:5]): 315 logger.info( 316 f"[{self.name}] [{i}] url={get_url(r)!r} " 317 f"title={r.get('title', '')[:60]!r}" 318 ) 319 320 match = self._pick_best_url(results, get_url) 321 if match: 322 logger.info(f"[{self.name}] Pass 2 HIT: {match}") 323 return match 324 325 except Exception as e: 326 logger.warning(f"[{self.name}] DDGS query failed ({query}): {e}") 327 continue 328 except ImportError: 329 logger.warning(f"[{self.name}] Neither ddgs nor duckduckgo_search installed — skipping") 330 331 return None 332 333 # ── Pass 3: Bing scrape ─────────────────────────────────────────────── 334 335 def _search_bing_scrape(self, device: str, headers: dict) -> Optional[str]: 336 import httpx 337 338 queries = [ 339 f"{device} user manual PDF", 340 f"{device} manual PDF manualslib OR manualzz", 341 ] 342 343 try: 344 with httpx.Client(follow_redirects=True, timeout=15, headers=headers) as client: 345 for query in queries: 346 try: 347 url = "https://www.bing.com/search?q=" + urllib.parse.quote(query) 348 r = client.get(url) 349 urls = self._extract_bing_urls(r.text) 350 351 logger.info(f"[{self.name}] Pass 3 query: {query!r} → {len(urls)} real URLs") 352 for i, u in enumerate(urls[:10]): 353 logger.info(f"[{self.name}] [{i}] {u}") 354 355 # Build fake result dicts so we can reuse _pick_best_url 356 results = [{"href": u, "title": "", "body": ""} for u in urls] 357 match = self._pick_best_url(results, lambda r: r["href"]) 358 if match: 359 logger.info(f"[{self.name}] Pass 3 HIT: {match}") 360 return match 361 362 except Exception as e: 363 logger.warning(f"[{self.name}] Bing query failed ({query}): {e}") 364 continue 365 except Exception as e: 366 logger.warning(f"[{self.name}] Bing scrape failed entirely: {e}") 367 368 return None 369 370 # ── Pass 4: Google scrape ───────────────────────────────────────────── 371 372 def _search_google_scrape(self, device: str, headers: dict) -> Optional[str]: 373 import httpx 374 375 queries = [ 376 f"{device} user manual PDF", 377 f"{device} manual filetype:pdf", 378 ] 379 380 try: 381 with httpx.Client(follow_redirects=True, timeout=15, headers=headers) as client: 382 for query in queries: 383 try: 384 url = "https://www.google.com/search?q=" + urllib.parse.quote(query) 385 r = client.get(url) 386 urls = self._extract_google_urls(r.text) 387 388 logger.info(f"[{self.name}] Pass 4 query: {query!r} → {len(urls)} real URLs") 389 for i, u in enumerate(urls[:10]): 390 logger.info(f"[{self.name}] [{i}] {u}") 391 392 results = [{"href": u, "title": "", "body": ""} for u in urls] 393 match = self._pick_best_url(results, lambda r: r["href"]) 394 if match: 395 logger.info(f"[{self.name}] Pass 4 HIT: {match}") 396 return match 397 398 except Exception as e: 399 logger.warning(f"[{self.name}] Google query failed ({query}): {e}") 400 continue 401 except Exception as e: 402 logger.warning(f"[{self.name}] Google scrape failed entirely: {e}") 403 404 return None 405 406 # ── URL extraction helpers ───────────────────────────────────────────── 407 408 @staticmethod 409 def _extract_bing_urls(html: str) -> list[str]: 410 """ 411 Extract real destination URLs from Bing search results HTML. 412 Bing wraps links as /ck/a?...&u=a1<base64url>... — we decode those. 413 Also picks up any direct href links that aren't bing/microsoft. 414 """ 415 urls = [] 416 seen = set() 417 418 # Method 1: decode Bing redirect URLs (/ck/a?...u=a1<base64>...) 419 for m in re.finditer(r'href="https?://www\.bing\.com/ck/a\?[^"]*?u=a1([A-Za-z0-9_-]+)[^"]*"', html): 420 try: 421 encoded = m.group(1) 422 # Fix base64url padding 423 padded = encoded + "=" * (4 - len(encoded) % 4) 424 decoded = base64.urlsafe_b64decode(padded).decode("utf-8", errors="ignore") 425 if decoded.startswith("http") and decoded not in seen: 426 seen.add(decoded) 427 urls.append(decoded) 428 except Exception: 429 continue 430 431 # Method 2: direct hrefs that aren't search engine domains 432 for m in re.finditer(r'href=["\'](https?://[^"\'<>\s]+)', html): 433 link = m.group(1) 434 if not any(d in link for d in _SEARCH_ENGINE_DOMAINS) and link not in seen: 435 seen.add(link) 436 urls.append(link) 437 438 return urls 439 440 @staticmethod 441 def _extract_google_urls(html: str) -> list[str]: 442 """ 443 Extract real destination URLs from Google search results HTML. 444 Google wraps links as /url?q=<url>&... — we extract the q parameter. 445 """ 446 urls = [] 447 seen = set() 448 449 # Method 1: Google redirect links 450 for m in re.finditer(r'/url\?q=(https?://[^&"]+)', html): 451 try: 452 decoded = urllib.parse.unquote(m.group(1)) 453 if not any(d in decoded for d in _SEARCH_ENGINE_DOMAINS) and decoded not in seen: 454 seen.add(decoded) 455 urls.append(decoded) 456 except Exception: 457 continue 458 459 # Method 2: direct hrefs 460 for m in re.finditer(r'href=["\'](https?://[^"\'<>\s]+)', html): 461 link = m.group(1) 462 if not any(d in link for d in _SEARCH_ENGINE_DOMAINS) and link not in seen: 463 seen.add(link) 464 urls.append(link) 465 466 return urls 467 468 # ── Shared URL ranking ───────────────────────────────────────────────── 469 470 def _pick_best_url(self, results: list[dict], get_url_fn) -> Optional[str]: 471 """ 472 From a list of search results, pick the best manual URL. 473 Priority: direct .pdf link > trusted site > any link with 'manual' + 'pdf' signals. 474 """ 475 # Tier 1: direct .pdf link 476 for r in results: 477 u = get_url_fn(r) 478 if u.lower().endswith(".pdf"): 479 return u 480 481 # Tier 2: trusted manual site 482 for r in results: 483 u = get_url_fn(r) 484 if any(t in u for t in TRUSTED_SITES): 485 # ManualsLib pages need /download.pdf appended 486 if "manualslib.com" in u and not u.endswith(".pdf"): 487 return u.rstrip("/") + "/download.pdf" 488 return u 489 490 # Tier 3: URL contains 'manual' or 'pdf' (but not a search engine) 491 for r in results: 492 u = get_url_fn(r) 493 u_lower = u.lower() 494 if u.startswith("http") and ("manual" in u_lower or "pdf" in u_lower): 495 if not any(d in u for d in _SEARCH_ENGINE_DOMAINS): 496 return u 497 498 # Tier 4: body/title mentions 'pdf' or 'manual' 499 for r in results: 500 u = get_url_fn(r) 501 text = (r.get("body", "") + r.get("title", "")).lower() 502 if ("pdf" in text or "manual" in text) and u.startswith("http"): 503 if not any(d in u for d in _SEARCH_ENGINE_DOMAINS): 504 return u 505 506 return None 507 508 # ── Download ─────────────────────────────────────────────────────────── 509 510 async def _download_pdf(self, url: str) -> Optional[bytes]: 511 try: 512 import httpx 513 except ImportError: 514 logger.error(f"[{self.name}] httpx is not installed — cannot download PDF") 515 return None 516 517 headers = { 518 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36" 519 } 520 try: 521 async with httpx.AsyncClient(follow_redirects=True, timeout=60, headers=headers) as client: 522 resp = await client.get(url) 523 if resp.status_code != 200: 524 logger.warning(f"[{self.name}] Download returned status {resp.status_code} for: {url}") 525 return None 526 ct = resp.headers.get("content-type", "") 527 if "pdf" in ct or resp.content[:4] == b"%PDF": 528 return resp.content 529 # HTML — hunt for embedded PDF link 530 links = re.findall(r'https?://[^\s"\'<>]+\.pdf', resp.text, re.IGNORECASE) 531 if links: 532 logger.info(f"[{self.name}] Following embedded PDF link: {links[0]}") 533 r2 = await client.get(links[0]) 534 if r2.status_code == 200 and r2.content[:4] == b"%PDF": 535 return r2.content 536 logger.warning(f"[{self.name}] URL did not return a PDF: {url} (content-type: {ct})") 537 except Exception as e: 538 logger.warning(f"[{self.name}] Download failed for {url}: {e}") 539 return None 540 541 # ── Extract text ─────────────────────────────────────────────────────── 542 543 def _extract_text(self, pdf_bytes: bytes) -> tuple[str, int]: 544 import io 545 try: 546 import pdfplumber 547 parts = [] 548 with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf: 549 pages = len(pdf.pages) 550 for p in pdf.pages: 551 t = p.extract_text() 552 if t: 553 parts.append(t) 554 if parts: 555 return "\n".join(parts), pages 556 except ImportError: 557 logger.warning(f"[{self.name}] pdfplumber not installed — trying pymupdf") 558 except Exception as e: 559 logger.warning(f"[{self.name}] pdfplumber extraction failed: {e}") 560 561 try: 562 import fitz 563 doc = fitz.open(stream=pdf_bytes, filetype="pdf") 564 parts = [p.get_text() for p in doc] 565 return "\n".join(t for t in parts if t), len(doc) 566 except ImportError: 567 logger.error(f"[{self.name}] Neither pdfplumber nor pymupdf (fitz) installed — cannot extract text") 568 except Exception as e: 569 logger.warning(f"[{self.name}] pymupdf extraction failed: {e}") 570 571 return "", 0 572 573 # ── Ask ──────────────────────────────────────────────────────────────── 574 575 async def _ask(self, question: str) -> dict: 576 if not self._manual_text: 577 return { 578 "error": "No manual loaded yet.", 579 "hint": 'Send: {"action": "load_manual", "device": "Your Device Model"}', 580 } 581 if not self.llm: 582 return {"error": "No LLM configured on this agent."} 583 584 await self._log(f"Answering: {question}") 585 586 chunks = self._chunk_text(self._manual_text, 600, 100) 587 ranked = self._rank_chunks(chunks, question)[:6] 588 context = "\n\n---\n\n".join(ranked) 589 590 prompt = ( 591 f"You are a helpful assistant. Answer the question below using ONLY the provided manual excerpt.\n\n" 592 f"Device: {self._manual_device}\n\n" 593 f"Manual excerpt:\n{context[:6000]}\n\n" 594 f"Question: {question}\n\n" 595 f"Give a clear, step-by-step answer based on the manual. " 596 f"If the manual doesn't contain the answer, say so." 597 ) 598 599 if hasattr(self.llm, "complete"): 600 response, _ = await self.llm.complete( 601 messages=[{"role": "user", "content": prompt}], 602 system="You answer questions strictly based on provided manual content.", 603 ) 604 else: 605 response = str(self.llm) 606 607 return { 608 "device": self._manual_device, 609 "question": question, 610 "answer": response, 611 } 612 613 # ── Status ───────────────────────────────────────────────────────────── 614 615 def _status(self) -> dict: 616 if not self._manual_device: 617 return {"status": "idle", "message": "No manual loaded."} 618 return { 619 "status": "loaded", 620 "device": self._manual_device, 621 "url": self._manual_url, 622 "pages": self._manual_pages, 623 "chars": len(self._manual_text or ""), 624 } 625 626 # ── Helpers ──────────────────────────────────────────────────────────── 627 628 _STOPWORDS = { 629 'how','do','i','the','a','an','is','are','what','where','when','why', 630 'can','does','to','for','of','in','on','at','my','this','that','it', 631 'its','with','and','or','be','was','will','has','have','use','using', 632 'get','me','please','tell','about','there','their','they','we','you', 633 'your','which','make','need', 634 } 635 636 def _keywords(self, text: str) -> list[str]: 637 words = re.findall(r'[a-z]+', text.lower()) 638 return [w for w in words if w not in self._STOPWORDS and len(w) > 2] 639 640 def _chunk_text(self, text: str, chunk_size=600, overlap=100) -> list[str]: 641 words = text.split() 642 chunks = [] 643 i = 0 644 while i < len(words): 645 chunks.append(" ".join(words[i:i + chunk_size])) 646 i += chunk_size - overlap 647 return chunks 648 649 def _rank_chunks(self, chunks: list[str], question: str) -> list[str]: 650 kws = self._keywords(question) 651 scored = [(sum(c.lower().count(kw) for kw in kws), c) for c in chunks] 652 scored.sort(key=lambda x: x[0], reverse=True) 653 return [c for _, c in scored] 654 655 # ── MQTT helpers ─────────────────────────────────────────────────────── 656 657 async def _log(self, msg: str): 658 logger.info(f"[{self.name}] {msg}") 659 await self._mqtt_publish( 660 f"agents/{self.actor_id}/logs", 661 {"type": "log", "message": msg, "timestamp": time.time()}, 662 ) 663 664 async def _alert(self, msg: str, severity: str = "warning"): 665 logger.warning(f"[{self.name}] ALERT: {msg}") 666 await self._mqtt_publish( 667 f"agents/{self.actor_id}/alerts", 668 {"message": msg, "severity": severity, "timestamp": time.time()}, 669 )
Pre-defined agent that finds, downloads, and answers questions from device manuals. Requires: httpx (+ pdfplumber or pymupdf for PDF extraction)
43 def __init__(self, llm_provider=None, **kwargs): 44 kwargs.setdefault("name", "manual-agent") 45 super().__init__(**kwargs) 46 self.llm = llm_provider 47 self._manual_text: Optional[str] = None 48 self._manual_device: Optional[str] = None 49 self._manual_url: Optional[str] = None 50 self._manual_pages: int = 0
57 async def on_start(self): 58 await self._mqtt_publish( 59 f"agents/{self.actor_id}/logs", 60 {"type": "log", "message": "Manual agent ready. Send {action: load_manual, device: ...} to begin.", "timestamp": time.time()}, 61 ) 62 logger.info(f"[{self.name}] Ready.")
Called when actor starts. Override for init logic.
66 async def chat(self, message: str) -> str: 67 """ 68 Synchronous-style entry point for CLIInterface and other direct callers. 69 Parses the message as JSON payload or plain-text question, executes the 70 action, and returns a human-readable string response. 71 """ 72 payload = None 73 stripped = message.strip() 74 if stripped.startswith("{"): 75 try: 76 payload = json.loads(stripped) 77 except json.JSONDecodeError: 78 pass 79 80 if payload and isinstance(payload, dict): 81 result = await self._handle_task_payload(payload) 82 else: 83 if self._manual_text: 84 result = await self._ask(stripped) 85 else: 86 result = { 87 "error": "No manual loaded yet.", 88 "hint": 'Send: {"action": "load_manual", "device": "Your Device Model"}', 89 } 90 91 return self._format_result(result)
Synchronous-style entry point for CLIInterface and other direct callers. Parses the message as JSON payload or plain-text question, executes the action, and returns a human-readable string response.
126 async def handle_message(self, msg: Message): 127 if msg.type == MessageType.TASK: 128 try: 129 result = await self._handle_task(msg) 130 except Exception as e: 131 logger.error(f"[{self.name}] Task handling failed: {e}", exc_info=True) 132 result = {"error": f"Internal error: {e}"} 133 134 target = msg.reply_to or msg.sender_id 135 if target: 136 await self.send(target, MessageType.RESULT, result) 137 else: 138 logger.warning( 139 f"[{self.name}] No reply target (reply_to={msg.reply_to!r}, " 140 f"sender_id={msg.sender_id!r}). Result discarded: {result}" 141 )
Handle messages not caught by default handlers.
37class PlannerAgent(Actor): 38 """ 39 On-demand orchestrator. Spawned per complex task, self-terminates when done. 40 """ 41 42 def __init__( 43 self, 44 llm_provider: Optional[LLMProvider] = None, 45 task: str = "", 46 reply_to_id: str = "", 47 reply_task_id: str = "", 48 auto_terminate: bool = True, 49 **kwargs, 50 ): 51 kwargs.setdefault("name", "planner") 52 super().__init__(**kwargs) 53 self.llm = llm_provider 54 self._task = task 55 self._reply_to_id = reply_to_id 56 self._reply_task_id = reply_task_id 57 self._auto_terminate = auto_terminate 58 self._result_futures: dict[str, asyncio.Future] = {} 59 self._spawned_by_planner: list[str] = [] # agents we created this run 60 61 def _current_task_description(self) -> str: 62 return self._task[:60] if self._task else "waiting for task" 63 64 # ── Lifecycle ────────────────────────────────────────────────────────── 65 66 async def on_start(self): 67 await self._log(f"Planner ready. Task: {self._task[:80]}") 68 if self._task: 69 asyncio.create_task(self._report_plan(self._task)) 70 71 # ── Message handling ─────────────────────────────────────────────────── 72 73 async def handle_message(self, msg: Message): 74 if msg.type == MessageType.TASK: 75 payload = msg.payload if isinstance(msg.payload, dict) else {"text": str(msg.payload)} 76 task_text = payload.get("text") or payload.get("task") or str(msg.payload) 77 self._reply_to_id = payload.get("_reply_to") or msg.reply_to or msg.sender_id or self._reply_to_id 78 task_id = payload.get("_task_id") 79 await self._log(f"Received task: {task_text[:80]}") 80 result = await self._run_plan(task_text) 81 if self._reply_to_id: 82 # Use the initiating task_id (from main) so the future resolves, 83 # falling back to the message-level task_id if present 84 resolve_id = self._reply_task_id or task_id 85 reply = {"result": result, "text": result} 86 if resolve_id: 87 reply["_task_id"] = resolve_id 88 if self._spawned_by_planner: 89 reply["spawned"] = self._spawned_by_planner 90 await self.send(self._reply_to_id, MessageType.RESULT, reply) 91 92 elif msg.type == MessageType.RESULT: 93 payload = msg.payload if isinstance(msg.payload, dict) else {} 94 task_id = payload.get("_task_id") 95 if task_id and task_id in self._result_futures: 96 fut = self._result_futures[task_id] 97 if not fut.done(): 98 fut.set_result(payload) 99 100 # ── Report wrapper (on_start path) ──────────────────────────────────── 101 102 async def _report_plan(self, task: str): 103 """Run the plan and report the result back to main (used when task set at spawn time).""" 104 result = await self._run_plan(task) 105 if self._reply_to_id: 106 reply = {"result": result, "text": result} 107 if self._reply_task_id: 108 reply["_task_id"] = self._reply_task_id 109 if self._spawned_by_planner: 110 reply["spawned"] = self._spawned_by_planner 111 await self.send(self._reply_to_id, MessageType.RESULT, reply) 112 113 # ── Core pipeline ────────────────────────────────────────────────────── 114 115 # ── Pipeline registry ────────────────────────────────────────────────── 116 # Each pipeline rule is stored here so users can list / delete them later. 117 # Stored in persistent state under key "_pipeline_rules". 118 # 119 # Schema per rule: 120 # { 121 # "rule_id": str, # unique slug 122 # "task": str, # original user request 123 # "agents": [str], # names of spawned agents for this rule 124 # "created_at": float, 125 # } 126 127 def _load_pipeline_rules(self) -> list[dict]: 128 return self.recall("_pipeline_rules") or [] 129 130 def _save_pipeline_rule(self, rule: dict): 131 rules = self._load_pipeline_rules() 132 rules = [r for r in rules if r.get("rule_id") != rule["rule_id"]] 133 rules.append(rule) 134 self.persist("_pipeline_rules", rules) 135 136 # ── Pipeline detection & dispatch ────────────────────────────────────── 137 138 def _is_pipeline_request(task: str) -> bool: 139 """ 140 Detect reactive/persistent pipeline requests vs one-shot tasks. 141 Pipelines use conditional/temporal language: if/when/whenever/monitor/watch/notify. 142 """ 143 import re 144 lowered = task.lower() 145 146 # Explicit pipeline prefix always wins 147 if lowered.startswith("pipeline:") or lowered.startswith("pipeline "): 148 return True 149 150 patterns = [ 151 r"\bif\b.*\bthen\b", 152 r"\bif\b.*\b(send|notify|alert|turn|open|close|post|message)\b", 153 r"\bwhen\b.*\b(detect|open|turn|send|notify|alert|is|becomes|goes|changes)\b", 154 r"\bwhenever\b", 155 r"\bmonitor\b", r"\bwatch\b", 156 r"\balert me\b", r"\bnotify me\b", 157 r"\bsend me\b.*\b(when|if|discord|message|notification)\b", 158 r"\bsend me a\b", 159 r"\bautomatically\b", 160 r"\bevery time\b", r"\bon detection\b", 161 r"\bis turned on\b", r"\bis turned off\b", 162 r"\bturns on\b", r"\bturns off\b", 163 r"\bopens\b.*\b(send|notify|alert|light|turn)\b", 164 r"\b(door|window|sensor|lamp|light|temperature|humidity|motion)\b.*\b(send|notify|discord|message)\b", 165 # camera/detect + action = pipeline 166 r"\b(camera|detect|yolo|webcam)\b.*\b(turn|open|send|notify|alert)\b", 167 r"\b(person|motion|object)\b.*\bdetect.*\b(turn|open|light|send)\b", 168 ] 169 return any(re.search(p, lowered) for p in patterns) 170 171 async def _run_plan(self, task: str) -> str: 172 workers = self._discover_workers() 173 await self._log(f"Workers available: {[w['name'] for w in workers]}") 174 175 # Detect pipeline vs one-shot 176 is_pipeline = PlannerAgent._is_pipeline_request(task) 177 if is_pipeline: 178 await self._log("Pipeline request detected — spawning persistent agents...") 179 return await self._run_pipeline(task, workers) 180 181 # ── 1. Check cache ───────────────────────────────────────────────── 182 cache_key = _task_hash(task) 183 cached = self._load_cached_plan(cache_key, workers) 184 if cached: 185 await self._log(f"Cache hit — reusing plan ({len(cached)} steps)") 186 plan = cached 187 else: 188 await self._log("No cache hit — generating plan with LLM...") 189 plan = await self._decompose(task, workers) 190 if not plan: 191 await self._log("Decomposition failed — answering directly") 192 return await self._llm_answer(task) 193 194 # ── 2. Spawn any missing agents declared in the plan ─────────────── 195 plan = await self._ensure_agents(plan) 196 197 # ── 3. Execute ───────────────────────────────────────────────────── 198 await self._log(f"Executing {len(plan)} step(s)...") 199 results = await self._execute(plan) 200 201 # ── 4. Synthesize ────────────────────────────────────────────────── 202 answer = await self._synthesize(task, plan, results) 203 204 # ── 5. Cache successful plan ─────────────────────────────────────── 205 if not cached: 206 self._save_plan_cache(cache_key, task, plan) 207 await self._log("Plan cached for future reuse.") 208 209 await self._log("Task complete.") 210 if self._auto_terminate: 211 asyncio.create_task(self._deferred_stop()) 212 213 return answer 214 215 # ── Pipeline mode (persistent reactive agents) ───────────────────────── 216 217 218 async def _run_pipeline(self, task: str, workers: list[dict]) -> str: 219 """ 220 Builds and spawns persistent reactive agents for if/when/whenever rules. 221 222 Flow: 223 1. _decompose_pipeline queries HomeAssistantAgent for real entity IDs 224 2. LLM produces spawn configs (ha_actuator for HA actions, dynamic for everything else) 225 3. Each agent is spawned and registered in main's spawn registry 226 4. Rule is saved so it can be listed/deleted later 227 5. Summary returned to the user 228 229 Multiple rules in one request are fully supported. 230 """ 231 plan = await self._decompose_pipeline(task, workers) 232 233 if not plan: 234 await self._log("Pipeline decomposition failed — falling back to direct answer") 235 return await self._llm_answer(task) 236 237 if len(plan) == 1 and "_feasibility_error" in plan[0]: 238 error = plan[0]["_feasibility_error"] 239 await self._log(f"Pipeline not feasible: {error}") 240 return f"Cannot set up this pipeline:\n\n{error}" 241 242 await self._log(f"Pipeline plan: {len(plan)} agent(s)") 243 spawned: list[str] = [] 244 wired: list[str] = [] 245 rule_agents: list[str] = [] 246 247 for step in plan: 248 name = step.get("name", "").strip() 249 description = step.get("description", "") 250 spawn_cfg = step.get("spawn_config") 251 252 if not name: 253 await self._log("Step missing name — skipping") 254 continue 255 256 if self._registry and self._registry.find_by_name(name): 257 await self._log(f"'{name}' already running — skipping") 258 wired.append(f"**{name}** (already active)") 259 rule_agents.append(name) 260 continue 261 262 if not spawn_cfg: 263 await self._log(f"Step '{name}' has no spawn_config — skipping") 264 continue 265 266 spawn_cfg = dict(spawn_cfg) 267 spawn_cfg["name"] = name 268 269 spawn_type = spawn_cfg.get("type", "dynamic") 270 await self._log(f"Spawning '{name}' (type={spawn_type})...") 271 try: 272 actor = await self._spawn_agent(spawn_cfg) 273 except Exception as e: 274 await self._log(f"Spawn failed for '{name}': {e}") 275 wired.append(f"**{name}** — spawn failed: {e}") 276 continue 277 278 if actor: 279 self._spawned_by_planner.append(name) 280 spawned.append(name) 281 rule_agents.append(name) 282 283 # Register in main's spawn registry for auto-restore on restart 284 if self._registry: 285 main = self._registry.find_by_name("main") 286 if main and hasattr(main, "_save_to_spawn_registry"): 287 registry_cfg = dict(spawn_cfg) 288 registry_cfg["name"] = name 289 registry_cfg["_rule"] = True 290 registry_cfg["_rule_task"] = task[:200] 291 main._save_to_spawn_registry(registry_cfg) 292 293 topics = spawn_cfg.get("mqtt_topics", []) 294 label = f"**{name}** — {description}" 295 if topics: 296 label += "\n listens: " + ", ".join(topics) 297 wired.append(label) 298 await asyncio.sleep(0.3) 299 else: 300 wired.append(f"**{name}** — failed to spawn") 301 302 # Persist this rule into main's pipeline rules registry 303 if rule_agents: 304 import hashlib as _hl 305 rule_id = _hl.md5(task.encode()).hexdigest()[:8] 306 rule = { 307 "rule_id": rule_id, 308 "task": task, 309 "agents": rule_agents, 310 "created_at": time.time(), 311 } 312 # Save into main so it survives planner self-termination 313 if self._registry: 314 main = self._registry.find_by_name("main") 315 if main and hasattr(main, "save_pipeline_rule"): 316 main.save_pipeline_rule(rule) 317 logger.info(f"[{self.name}] Pipeline rule {rule_id} saved to main") 318 319 self._auto_terminate = False 320 321 if not wired: 322 return "Pipeline plan generated but no agents could be spawned. Check logs." 323 324 out = ["Pipeline active! Here's what I set up:\n"] 325 out += [f"{i+1}. {w}" for i, w in enumerate(wired)] 326 out.append("\nThese agents run continuously and react to events automatically.") 327 out.append("Use `/rules` to see all active pipeline rules.") 328 if spawned: 329 out.append(f"\nSpawned: {', '.join(spawned)} — will auto-restore on restart.") 330 return "\n".join(out) 331 332 async def _decompose_pipeline(self, task: str, workers: list[dict]) -> list[dict]: 333 """ 334 Decomposes a reactive pipeline request into persistent agent spawn configs. 335 336 Flow: 337 1. Query HomeAssistantAgent for live entities (delegates — no duplication) 338 2. Feasibility check — surface clear error if required HA entities are missing 339 3. LLM produces spawn configs with real entity IDs and correct MQTT wiring 340 """ 341 if not self.llm: 342 return [] 343 344 # ── 1. Get HA entities via HomeAssistantAgent ────────────────────── 345 ha_entities_text = "" 346 ha_available = False 347 348 try: 349 if self._registry and self._registry.find_by_name("home-assistant-agent"): 350 result = await self._delegate("home-assistant-agent", "list_entities") 351 if result and not result.get("error"): 352 entities_list = result.get("entities", []) 353 if entities_list: 354 lines = [] 355 for e in entities_list[:200]: 356 eid = e.get("entity_id", "") 357 ename = e.get("name", "") 358 plat = e.get("platform", "") 359 if eid: 360 parts = [eid] 361 if ename and ename != eid: 362 parts.append(f"name={ename}") 363 if plat: 364 parts.append(f"platform={plat}") 365 lines.append(" " + " ".join(parts)) 366 ha_entities_text = "\n".join(lines) 367 ha_available = True 368 logger.info(f"[{self.name}] Got {len(entities_list)} HA entities via home-assistant-agent") 369 except Exception as e: 370 logger.warning(f"[{self.name}] Could not query home-assistant-agent: {e}") 371 372 # Fallback: fetch directly if HA agent is unavailable 373 if not ha_available: 374 try: 375 from ..config import CONFIG 376 from ..core.integrations.home_assistant.ha_helper import fetch_devices_entities_with_location 377 ha_url = (CONFIG.ha_url or "").rstrip("/") 378 ha_token = (CONFIG.ha_token or "").strip() 379 if ha_url and ha_token: 380 devices = await fetch_devices_entities_with_location(ha_url, ha_token, include_states=True) 381 lines = [] 382 for device in devices[:150]: 383 area = device.get("area", "") 384 for entity in device.get("entities", []): 385 eid = entity.get("entity_id", "") 386 ename = entity.get("friendly_name") or entity.get("name", "") 387 state = entity.get("state", "") 388 if eid: 389 parts = [eid] 390 if ename: parts.append(f"name={ename}") 391 if area: parts.append(f"area={area}") 392 if state: parts.append(f"state={state}") 393 lines.append(" " + " ".join(parts)) 394 ha_entities_text = "\n".join(lines) 395 ha_available = bool(lines) 396 logger.info(f"[{self.name}] Direct HA fetch: {len(lines)} entities") 397 except Exception as e: 398 logger.warning(f"[{self.name}] Direct HA fetch failed: {e}") 399 400 ha_section = ha_entities_text if ha_entities_text else \ 401 " (HA not reachable — use entity IDs provided by the user)" 402 403 # ── Fetch stored notification URLs from main ────────────────────── 404 notification_urls: dict = {} 405 if self._registry: 406 main = self._registry.find_by_name("main") 407 if main and hasattr(main, "get_notification_urls"): 408 notification_urls = main.get_notification_urls() 409 410 # Also extract any URL directly mentioned in the task 411 import re as _re 412 _url_match = _re.search( 413 r'https?://(?:discord\.com/api/webhooks|hooks\.slack\.com|api\.telegram\.org)/\S+', 414 task 415 ) 416 if _url_match: 417 url = _url_match.group(0).rstrip(".,;!)'\"") 418 if "discord" in url: 419 notification_urls["discord"] = url 420 elif "slack" in url: 421 notification_urls["slack"] = url 422 elif "telegram" in url: 423 notification_urls["telegram"] = url 424 425 notif_section = "" 426 if notification_urls: 427 lines = ["NOTIFICATION URLS (use these directly in code — do not use placeholders):"] 428 for svc, url in notification_urls.items(): 429 lines.append(f" {svc}: {url}") 430 notif_section = "\n".join(lines) 431 else: 432 notif_section = ( 433 "NOTIFICATION URLS: none stored.\n" 434 "If the user wants Discord/Slack/Telegram notifications and no URL is available,\n" 435 "use a placeholder 'WEBHOOK_URL_REQUIRED' and set description to explain the user must run:\n" 436 " /webhook discord <url>" 437 ) 438 _local_kw = ("camera", "webcam", "laptop", "detect", "yolo", "person", 439 "object detection", "cv2", "opencv", 440 "discord", "telegram", "slack", "notify", "notification", "message") 441 _skip_feasibility = any(kw in task.lower() for kw in _local_kw) 442 443 if ha_available and ha_entities_text and not _skip_feasibility: 444 feas_prompt = ( 445 "Check if this reactive automation can be fulfilled with available HA entities.\n\n" 446 f"USER REQUEST: {task}\n\n" 447 f"AVAILABLE HA ENTITIES:\n{ha_section}\n\n" 448 'Return JSON only:\n' 449 '{"feasible": true/false, "reason": "<one sentence if not feasible>", "relevant_entities": ["entity_id", ...]}\n\n' 450 "Rules:\n" 451 "- feasible=true only if ALL required entity types exist\n" 452 "- Camera/webcam/Discord/notification requests: always feasible=true" 453 ) 454 try: 455 feas_resp, _ = await self.llm.complete( 456 messages=[{"role": "user", "content": feas_prompt}], 457 system="Output only valid JSON. No markdown.", 458 max_tokens=400, 459 ) 460 clean = feas_resp.strip() 461 for fence in ("```json", "```"): 462 if clean.startswith(fence): 463 clean = clean[len(fence):] 464 if clean.endswith("```"): 465 clean = clean[:-3] 466 clean = clean.strip() 467 feas = json.loads(clean) 468 if not feas.get("feasible", True): 469 reason = feas.get("reason", "Cannot fulfill request with available HA entities.") 470 logger.warning(f"[{self.name}] Feasibility failed: {reason}") 471 return [{"_feasibility_error": reason}] 472 logger.info(f"[{self.name}] Feasibility OK — relevant: {feas.get('relevant_entities', [])}") 473 except Exception as e: 474 logger.warning(f"[{self.name}] Feasibility check error (continuing): {e}") 475 476 # ── 3. Decompose into spawn configs ──────────────────────────────── 477 # Build the prompt as a list of parts to avoid f-string escape issues 478 prompt_parts = [ 479 "You are designing reactive automation pipelines for a multi-agent IoT system.", 480 "Output ONLY a valid JSON array — no explanation, no markdown, no code fences.", 481 "", 482 "═══ SYSTEM ARCHITECTURE ═══", 483 "", 484 "HomeAssistantStateBridgeAgent (ALWAYS running, NEVER spawn again):", 485 " Publishes every HA state change to MQTT.", 486 " Topic format depends on HA_STATE_BRIDGE_PER_ENTITY config — can be either:", 487 " Flat: homeassistant/state_changes (all entities, one topic)", 488 " Per-entity: homeassistant/state_changes/{domain}/{full_entity_id} (one topic per entity)", 489 " ALWAYS subscribe to the wildcard: homeassistant/state_changes/#", 490 " This catches BOTH formats and never breaks regardless of config.", 491 ' Payload always contains: {"entity_id": "light.wiz_...", "domain": "light", "new_state": {"state": "on", ...}, "old_state": {...}}', 492 " Filter by entity_id IN THE PAYLOAD — never rely on the topic path for filtering.", 493 " NOTE: 'state' is NESTED inside new_state — check payload['new_state']['state'].", 494 "", 495 "═══ AGENT TYPES ═══", 496 "", 497 'TYPE 1 — "ha_actuator"', 498 " Purpose: call any Home Assistant service (turn_on, turn_off, set_temperature, open_cover, etc.)", 499 " No code needed. Subscribes to an MQTT trigger topic and calls the HA service.", 500 " detection_filter matches TOP-LEVEL keys of the incoming payload only.", 501 " spawn_config schema:", 502 ' "type": "ha_actuator"', 503 ' "automation_id": "<unique-kebab-id>"', 504 ' "description": "<what this does>"', 505 ' "mqtt_topics": ["<trigger-topic>"]', 506 ' "actions": [{"domain": "<ha-domain>", "service": "<ha-service>", "entity_id": "<entity_id-from-list>", "service_data": {}}]', 507 ' "conditions": []', 508 ' "detection_filter": {"<top-level-key>": <value>} or null', 509 ' "cooldown_seconds": <number>', 510 "", 511 'TYPE 2 — "dynamic"', 512 " Purpose: any logic that needs code — state filtering, webcam, timers, HTTP webhooks, Discord, etc.", 513 " Define these async functions (all optional except at least one must exist):", 514 " async def setup(agent) — runs once on start, good for subscriptions and init", 515 " async def process(agent) — runs in a loop every poll_interval seconds", 516 " Available APIs (ONLY these — no other agent methods exist):", 517 ' await agent.log("message") — structured log', 518 ' await agent.publish("topic", {dict}) — publish to MQTT', 519 ' agent.subscribe("topic", async_callback) — subscribe to MQTT, callback(payload_dict) per message', 520 ' IMPORTANT: runs as background task, setup() returns immediately', 521 ' agent.recall("key") — load persisted value', 522 ' agent.persist("key", value) — save persisted value', 523 ' agent.state["key"] — in-memory dict (cleared on restart)', 524 " CRITICAL RULES FOR DYNAMIC AGENT CODE:", 525 " NEVER import or use aiomqtt directly — use agent.subscribe() instead", 526 " NEVER hardcode MQTT broker hostnames or ports — agent.subscribe() handles this automatically", 527 " NEVER use asyncio.create_task() for MQTT — agent.subscribe() already creates the background task", 528 " agent.subscribe() is non-blocking — call it in setup() and return immediately", 529 " spawn_config schema:", 530 ' "type": "dynamic"', 531 ' "description": "<what this does>"', 532 ' "install": ["<pip-package>", ...] — packages to install before running', 533 ' "poll_interval": <seconds> — how often process(agent) runs', 534 ' "code": "<full python source as single string with \\n for newlines>"', 535 "", 536 "═══ CANONICAL WIRING PATTERNS ═══", 537 "", 538 "PATTERN 1 — HA sensor triggers HA action (door → light, motion → switch, temp → AC):", 539 " Problem: HA state is nested in new_state.state, ha_actuator can only filter top-level keys.", 540 " Solution: use a dynamic filter agent to extract and re-publish the trigger.", 541 " Agent 1 (dynamic, name: '<slug>-state-filter'):", 542 " setup(agent): use agent.subscribe() to listen to homeassistant/state_changes/{domain}/{entity_id}", 543 " Check new_state['state'] against condition, if met: await agent.publish('custom/triggers/<slug>', {'triggered': True})", 544 " agent.subscribe() runs as a background task — setup() must return immediately after calling it.", 545 " Agent 2 (ha_actuator, name: '<slug>-actuator'):", 546 " mqtt_topics: ['custom/triggers/<slug>']", 547 " detection_filter: {'triggered': True}", 548 " actions: [the HA service call with the correct entity_id]", 549 " CONDITION EXAMPLES:", 550 " Binary sensor (door/window/motion): new_state['state'] == 'on'", 551 " Numeric sensor (temperature/humidity): float(new_state.get('state', 0)) > threshold", 552 " Switch/light: new_state['state'] == 'on' or 'off'", 553 " PATTERN 1 CODE TEMPLATE:", 554 " async def setup(agent):", 555 " async def on_state(payload):", 556 " if payload.get('entity_id') != 'light.wiz_rgbw_tunable_02cba0': return", 557 " state = payload.get('new_state', {}).get('state', '')", 558 " if state == 'on': # adapt condition to user request", 559 " await agent.publish('custom/triggers/<slug>', {'triggered': True, 'state': state})", 560 " # Use wildcard — works regardless of per-entity or flat topic config", 561 " agent.subscribe('homeassistant/state_changes/#', on_state)", 562 "", 563 "PATTERN 2 — HA sensor triggers notification (Discord, Slack, HTTP webhook):", 564 " ONE dynamic agent using agent.subscribe():", 565 " async def setup(agent):", 566 " async def on_state(payload):", 567 " if payload.get('entity_id') != 'light.wiz_rgbw_tunable_02cba0': return", 568 " state = payload.get('new_state', {}).get('state', '')", 569 " if state == 'on': # adapt condition", 570 " import httpx", 571 " async with httpx.AsyncClient() as c:", 572 " await c.post('<WEBHOOK_URL>', json={'content': 'Lamp turned on!'})", 573 " await agent.log('Discord notification sent')", 574 " # Use wildcard — works regardless of per-entity or flat topic config", 575 " agent.subscribe('homeassistant/state_changes/#', on_state)", 576 " Install: httpx", 577 " IMPORTANT: use the exact webhook URL from NOTIFICATION URLS section below.", 578 "", 579 "PATTERN 3 — Webcam/camera object detection triggers HA action:", 580 " Agent 1 (dynamic, name: '<slug>-camera-detect'):", 581 " setup(agent): load YOLO model and open camera", 582 " process(agent): capture frame, run inference, determine if target object is detected,", 583 " publish {'detected': bool, 'target': '<object-name>', 'objects': [list-of-all-detected]}", 584 " to custom/detections/<slug>", 585 " Install: ultralytics, opencv-python", 586 " poll_interval: 1", 587 " Agent 2 (ha_actuator, name: '<slug>-actuator'):", 588 " mqtt_topics: ['custom/detections/<slug>']", 589 " detection_filter: {'detected': True}", 590 " actions: [HA service call]", 591 " IMPORTANT: publish {'detected': bool} not {'person_detected': bool} — generic for any object.", 592 " In code: target = '<object-name-from-user-request>'; detected = target in set(detected_labels)", 593 "", 594 "PATTERN 4 — Webcam detection triggers notification:", 595 " Agent 1: same as Pattern 3 agent 1", 596 " Agent 2 (dynamic, name: '<slug>-notify'):", 597 " setup(agent): use agent.subscribe() on custom/detections/<slug>", 598 " When detected=True: POST notification via httpx", 599 "", 600 "PATTERN 5 — Timer/schedule triggers HA action:", 601 " Agent 1 (dynamic, name: '<slug>-timer'):", 602 " process(agent): check current time (import datetime), if matches schedule:", 603 " await agent.publish('custom/triggers/<slug>', {'triggered': True})", 604 " poll_interval: 60", 605 " Agent 2 (ha_actuator): subscribes to custom/triggers/<slug>", 606 "", 607 "═══ GENERAL RULES ═══", 608 "- Use EXACT entity_id values from the HA entities list — never invent entity IDs", 609 "- For HA service calls: look up the correct domain and service for the entity type", 610 " light → light.turn_on / light.turn_off", 611 " switch → switch.turn_on / switch.turn_off", 612 " climate → climate.set_temperature / climate.set_hvac_mode", 613 " cover → cover.open_cover / cover.close_cover", 614 " script → script.turn_on", 615 "- Multiple rules in one request → output ALL agents for ALL rules", 616 "- Each agent does exactly ONE job — keep it minimal", 617 "- Replace <slug> consistently across paired agents with a short descriptive kebab-case id", 618 "- ALWAYS subscribe to homeassistant/state_changes/# (wildcard) — NEVER to a specific sub-topic", 619 " Filter by entity_id in the payload: if payload.get('entity_id') != 'light.xyz': return", 620 " This works regardless of whether HA_STATE_BRIDGE_PER_ENTITY is on or off", 621 "- If user provides a Discord webhook URL, use it directly in code", 622 "- If user provides a condition threshold (e.g. 'above 28 degrees'), encode it in the filter agent code", 623 "- Dynamic agent code must be a single string with actual \\n newlines (not literal backslash-n)", 624 "", 625 "═══ HOME ASSISTANT ENTITIES ═══", 626 ha_section, 627 "", 628 "═══ NOTIFICATION URLS ═══", 629 notif_section, 630 "", 631 "═══ OUTPUT FORMAT ═══", 632 "JSON array. Each element:", 633 '{"name": "<unique-kebab-name>", "description": "<one sentence>", "spawn_config": {<full spawn_config>}}', 634 "", 635 "═══ USER REQUEST ═══", 636 task, 637 ] 638 prompt = "\n".join(prompt_parts) 639 640 try: 641 response, _ = await self.llm.complete( 642 messages=[{"role": "user", "content": prompt}], 643 system="You are a JSON-only pipeline architect. Output only a valid JSON array. No markdown, no explanation.", 644 max_tokens=4000, 645 ) 646 clean = response.strip() 647 if clean.startswith("```"): 648 clean = "\n".join(clean.split("\n")[1:]) 649 if "```" in clean: 650 clean = clean[:clean.rfind("```")] 651 start = clean.find("[") 652 end = clean.rfind("]") 653 if start != -1 and end != -1: 654 clean = clean[start:end + 1] 655 plan = json.loads(clean.strip()) 656 if isinstance(plan, list): 657 # Validate generated code — catch common LLM mistakes 658 plan = self._validate_pipeline_code(plan) 659 logger.info(f"[{self.name}] Pipeline plan: {len(plan)} step(s)") 660 for i, step in enumerate(plan): 661 sc = step.get("spawn_config", {}) 662 logger.info( 663 f"[{self.name}] step {i + 1}: name={step.get('name')} " 664 f"type={sc.get('type')} topics={sc.get('mqtt_topics', [])}" 665 ) 666 return plan 667 except Exception as e: 668 logger.error(f"[{self.name}] Pipeline decomposition error: {e}") 669 return [] 670 671 # ── Pipeline code validator ──────────────────────────────────────────── 672 673 def _validate_pipeline_code(self, plan: list[dict]) -> list[dict]: 674 """ 675 Scan generated dynamic agent code for common LLM mistakes and fix them. 676 Currently catches: 677 - Raw aiomqtt.Client() usage (should use agent.subscribe() instead) 678 - Hardcoded MQTT broker hostnames 679 Logs warnings so the user knows what was fixed. 680 """ 681 import re as _re 682 for step in plan: 683 sc = step.get("spawn_config", {}) 684 if sc.get("type") != "dynamic": 685 continue 686 code = sc.get("code", "") 687 if not code: 688 continue 689 690 issues = [] 691 692 # Detect raw aiomqtt.Client() — LLM should use agent.subscribe() 693 if "aiomqtt.Client(" in code or "aiomqtt.connect(" in code: 694 issues.append("raw aiomqtt.Client() — should use agent.subscribe()") 695 # Attempt to rewrite: extract topic and replace entire aiomqtt block 696 # with agent.subscribe() pattern 697 topics = _re.findall(r'await\s+client\.subscribe\(["\']([^"\']+)["\']', code) 698 if topics: 699 topic = topics[0] 700 # Build replacement code using agent.subscribe() 701 fixed = self._rewrite_aiomqtt_to_subscribe(code, topic) 702 if fixed: 703 sc["code"] = fixed 704 code = fixed 705 logger.info(f"[{self.name}] Auto-fixed raw aiomqtt in '{step.get('name')}' → agent.subscribe('{topic}')") 706 707 if issues: 708 logger.warning( 709 f"[{self.name}] Code issues in '{step.get('name')}': {'; '.join(issues)}" 710 ) 711 712 return plan 713 714 @staticmethod 715 def _rewrite_aiomqtt_to_subscribe(code: str, topic: str) -> str: 716 """ 717 Best-effort rewrite of raw aiomqtt MQTT subscription code to use agent.subscribe(). 718 Extracts the message handling callback and rewires it. 719 Returns empty string if rewrite fails (original code kept). 720 """ 721 import re as _re 722 723 # Try to extract the callback body — look for the inner async for loop body 724 # Pattern: async for msg/message in client.messages: ... payload handling ... 725 match = _re.search( 726 r'async\s+for\s+\w+\s+in\s+client\.messages:\s*\n(.*?)(?=\n\s*except|\n\s*$)', 727 code, 728 _re.DOTALL, 729 ) 730 if not match: 731 return "" 732 733 callback_body = match.group(1) 734 735 # Detect how payload is parsed — json.loads(msg.payload) or similar 736 payload_parse = "" 737 if "json.loads" in callback_body: 738 payload_parse = " # payload is already a dict (parsed by agent.subscribe)\n" 739 740 # Strip leading indentation from callback body 741 lines = callback_body.splitlines() 742 min_indent = min((len(l) - len(l.lstrip()) for l in lines if l.strip()), default=4) 743 dedented = "\n".join(" " + l[min_indent:] for l in lines if l.strip()) 744 745 # Extract any setup code before the aiomqtt block 746 pre_match = _re.split(r'async\s+with\s+aiomqtt\.Client', code)[0] 747 pre_lines = [l for l in pre_match.splitlines() 748 if l.strip() and not l.strip().startswith("import aiomqtt") 749 and not l.strip().startswith("async def setup")] 750 pre_code = "\n".join(" " + l.strip() for l in pre_lines if l.strip()) + "\n" if pre_lines else "" 751 752 rewritten = ( 753 f"async def setup(agent):\n" 754 f"{pre_code}" 755 f" async def _on_message(payload):\n" 756 f"{payload_parse}" 757 f"{dedented}\n" 758 f" agent.subscribe('{topic}', _on_message)\n" 759 f" await agent.log('Subscribed to {topic}')\n" 760 ) 761 762 # Preserve any process() or handle_task() that existed 763 import re as _re2 764 for fn in ("process", "handle_task"): 765 fn_match = _re2.search(rf'async\s+def\s+{fn}\s*\(', code) 766 if fn_match: 767 rewritten += "\n" + code[fn_match.start():] 768 break 769 770 return rewritten 771 772 # ── Plan cache ───────────────────────────────────────────────────────── 773 774 def _load_cached_plan(self, cache_key: str, workers: list[dict]) -> Optional[list]: 775 """Load a cached plan if it exists, is fresh, and all required agents are alive.""" 776 raw = self.recall(_PLAN_CACHE_KEY) or {} 777 entry = raw.get(cache_key) 778 if not entry: 779 return None 780 781 # TTL check 782 age = time.time() - entry.get("timestamp", 0) 783 if age > _CACHE_TTL_S: 784 logger.info(f"[{self.name}] Cache expired ({age/3600:.1f}h old)") 785 return None 786 787 plan = entry.get("plan", []) 788 if not plan: 789 return None 790 791 # Validate all agents in the plan are still running 792 alive = {w["name"] for w in workers} | {"main", self.name} 793 for step in plan: 794 agent = step.get("agent", "") 795 if agent not in alive and not step.get("spawn_config"): 796 logger.info(f"[{self.name}] Cache invalid — agent '{agent}' no longer running") 797 return None 798 799 return plan 800 801 def _save_plan_cache(self, cache_key: str, task: str, plan: list): 802 """Persist the plan so future similar tasks can reuse it.""" 803 raw = self.recall(_PLAN_CACHE_KEY) or {} 804 # Evict entries older than TTL 805 now = time.time() 806 raw = {k: v for k, v in raw.items() if now - v.get("timestamp", 0) < _CACHE_TTL_S} 807 raw[cache_key] = { 808 "task": task[:200], 809 "plan": plan, 810 "timestamp": now, 811 } 812 self.persist(_PLAN_CACHE_KEY, raw) 813 814 # ── Worker discovery ─────────────────────────────────────────────────── 815 816 def _discover_workers(self) -> list[dict]: 817 if not self._registry: 818 return [] 819 # Pull full manifests from main's capability registry (includes schemas) 820 main = self._registry.find_by_name("main") 821 manifest_map: dict = {} 822 if main and hasattr(main, "list_capabilities"): 823 for cap in main.list_capabilities(): 824 manifest_map[cap["name"]] = cap 825 826 workers = [] 827 for actor in self._registry.all_actors(): 828 if actor.name in _SKIP_AGENTS or actor.name == self.name: 829 continue 830 # Prefer manifest data (richer), fall back to live actor attrs 831 manifest = manifest_map.get(actor.name, {}) 832 workers.append({ 833 "name": actor.name, 834 "type": type(actor).__name__, 835 "description": ( 836 manifest.get("description") 837 or getattr(actor, "description", "") 838 or getattr(actor, "system_prompt", "")[:100] 839 or type(actor).__name__ 840 ), 841 "capabilities": manifest.get("capabilities", []), 842 "input_schema": manifest.get("input_schema", {}), 843 "output_schema": manifest.get("output_schema", {}), 844 }) 845 return workers 846 847 # ── Decomposition ────────────────────────────────────────────────────── 848 849 async def _decompose(self, task: str, workers: list[dict]) -> list[dict]: 850 """LLM breaks task into steps. Can declare missing agents with spawn configs.""" 851 if not self.llm: 852 return [] 853 854 def _fmt_worker(w: dict) -> str: 855 lines = [f" - {w['name']} ({w['type']}): {w['description']}"] 856 if w.get("capabilities"): 857 lines.append(f" capabilities: {', '.join(w['capabilities'])}") 858 if w.get("input_schema"): 859 lines.append(f" input_schema : {w['input_schema']}") 860 if w.get("output_schema"): 861 lines.append(f" output_schema: {w['output_schema']}") 862 return "\n".join(lines) 863 864 workers_desc = "\n".join(_fmt_worker(w) for w in workers) 865 866 prompt = f"""You are a task planner for a multi-agent system. 867Break the task into steps. Each step is handled by one agent. 868 869AVAILABLE AGENTS (with input/output contracts): 870{workers_desc} 871 872TASK: {task} 873 874OUTPUT RULES: 875- Respond ONLY with a valid JSON array. No explanation, no markdown. 876- Each step object: 877 {{ 878 "step": <int>, 879 "agent": "<agent-name>", 880 "task": "<what to ask this agent>", 881 "parallel": <true|false>, 882 "depends_on": [<step ints>], 883 "spawn_config": <null or spawn object if agent needs to be created> 884 }} 885- "parallel": true if this step can run concurrently with other parallel steps 886- "depends_on": step numbers whose results this step needs (empty list if none) 887- "spawn_config": if the ideal agent for a step does NOT exist in the available list, 888 include a spawn config to create it. 889 AGENT TYPE RULES: 890 Use "llm" ONLY for pure conversation/Q&A/explanation agents (no external APIs or tools). 891 Use "dynamic" for anything that fetches data, calls APIs, runs searches, or uses libraries. 892 In dynamic agent code ALWAYS use: await agent.log(msg), await agent.publish(topic, dict), agent.state dict, agent.recall(key), agent.persist(key, val). 893 NEVER use agent.logger — it does not exist. Use await agent.log(msg) instead. 894 LLM agent example: 895 {{ 896 "name": "translator-agent", 897 "type": "llm", 898 "system_prompt": "You are an expert translator. Translate text accurately." 899 }} 900 Dynamic agent example (for weather, news, search, APIs): 901 {{ 902 "name": "weather-agent", 903 "type": "dynamic", 904 "description": "Fetches live weather data for a city", 905 "input_schema": {{"city": "str — city name to fetch weather for"}}, 906 "output_schema": {{"city": "str", "temp_c": "str", "description": "str"}}, 907 "poll_interval": 3600, 908 "code": "async def setup(agent):\n await agent.log('ready')\nasync def process(agent):\n import asyncio\n await asyncio.sleep(3600)\nasync def handle_task(agent, payload):\n import httpx\n city = payload.get('city', 'Athens')\n async with httpx.AsyncClient(timeout=10) as c:\n r = await c.get(f'https://wttr.in/{{city}}?format=j1')\n d = r.json()\n cur = d['current_condition'][0]\n return {{'city': city, 'temp_c': cur['temp_C'], 'description': cur['weatherDesc'][0]['value']}}" 909 }} 910- The FINAL synthesis step should ALWAYS be assigned to "main" (not any other agent). 911 Main will combine results using its LLM. Never assign synthesis to a domain agent. 912- Only create new agents when TRULY necessary — prefer existing agents. 913- If one agent can handle everything, output a single-step plan. 914- Keep it minimal — avoid unnecessary steps. 915- IMPORTANT: For any step that combines, summarizes, synthesizes or compares results 916 from other steps, ALWAYS use "agent": "main" — never a domain agent. 917- Domain agents (weather, news, manual, etc.) are for DATA RETRIEVAL only. 918 "main" handles all reasoning, summarization and synthesis. 919 920Example: 921[ 922 {{"step": 1, "agent": "weather-agent", "task": "Get weather in Athens", "parallel": true, "depends_on": [], "spawn_config": null}}, 923 {{"step": 2, "agent": "news-agent", "task": "Get AI news today", "parallel": true, "depends_on": [], "spawn_config": null}}, 924 {{"step": 3, "agent": "main", "task": "Summarize the weather and news results", "parallel": false, "depends_on": [1, 2], "spawn_config": null}} 925]""" 926 927 try: 928 response, _ = await self.llm.complete( 929 messages=[{"role": "user", "content": prompt}], 930 system="You are a JSON-only task planner. Output only valid JSON arrays, nothing else.", 931 max_tokens=1500, 932 ) 933 clean = response.strip() 934 # Strip markdown fences 935 if clean.startswith("```"): 936 clean = "\n".join(clean.split("\n")[1:]) 937 if clean.endswith("```"): 938 clean = "\n".join(clean.split("\n")[:-1]) 939 plan = json.loads(clean.strip()) 940 if isinstance(plan, list) and plan: 941 return plan 942 except Exception as e: 943 logger.error(f"[{self.name}] Decomposition error: {e}") 944 return [] 945 946 # ── Missing agent spawning ───────────────────────────────────────────── 947 948 async def _ensure_agents(self, plan: list[dict]) -> list[dict]: 949 """ 950 For any step with a spawn_config, spawn the agent if it's not running. 951 Updates the plan with the actual agent name once spawned. 952 """ 953 if not self._registry: 954 return plan 955 956 for step in plan: 957 spawn_config = step.get("spawn_config") 958 if not spawn_config: 959 continue 960 961 agent_name = spawn_config.get("name") or step.get("agent") 962 existing = self._registry.find_by_name(agent_name) 963 964 if existing: 965 await self._log(f"Agent '{agent_name}' already running — skipping spawn") 966 step["agent"] = agent_name 967 continue 968 969 await self._log(f"Spawning missing agent: '{agent_name}'") 970 try: 971 actor = await self._spawn_agent(spawn_config) 972 if actor: 973 step["agent"] = agent_name 974 self._spawned_by_planner.append(agent_name) 975 # Brief pause to let agent initialise 976 await asyncio.sleep(1.0) 977 await self._log(f"'{agent_name}' ready.") 978 else: 979 await self._log(f"Failed to spawn '{agent_name}' — step will use main as fallback") 980 step["agent"] = "main" 981 except Exception as e: 982 logger.error(f"[{self.name}] Spawn of '{agent_name}' failed: {e}") 983 step["agent"] = "main" 984 985 return plan 986 987 async def _spawn_agent(self, config: dict) -> Optional[Actor]: 988 """Spawn an agent from a config dict — same logic as MainActor._spawn_from_config.""" 989 agent_type = config.get("type", "dynamic") 990 name = config.get("name", "spawned-agent") 991 992 if agent_type == "ha_actuator": 993 from .home_assistant_actuator_agent import ( 994 HomeAssistantActuatorAgent, ActuatorConfig, 995 ActuatorAction, ActuatorCondition, 996 ) 997 # Ensure automation_id is unique — append short hash if needed 998 automation_id = config.get("automation_id", name) 999 if self._registry and self._registry.find_by_name(f"actuator-{automation_id[:20]}"): 1000 import hashlib 1001 suffix = hashlib.md5(f"{automation_id}{time.time()}".encode()).hexdigest()[:4] 1002 automation_id = f"{automation_id}-{suffix}" 1003 name = f"actuator-{automation_id[:20]}" 1004 actuator_config = ActuatorConfig( 1005 automation_id = automation_id, 1006 description = config.get("description", ""), 1007 mqtt_topics = config.get("mqtt_topics", []), 1008 actions = [ActuatorAction.from_dict(a) for a in config.get("actions", [])], 1009 conditions = [ActuatorCondition.from_dict(c) for c in config.get("conditions", [])], 1010 detection_filter = config.get("detection_filter"), 1011 cooldown_seconds = float(config.get("cooldown_seconds", 10.0)), 1012 ) 1013 actor = await self.spawn( 1014 HomeAssistantActuatorAgent, 1015 config=actuator_config, 1016 name=name, 1017 persistence_dir=str(self._persistence_dir.parent), 1018 ) 1019 await self._register_with_main(config) 1020 return actor 1021 1022 if agent_type == "llm": 1023 from .llm_agent import LLMAgent 1024 actor = await self.spawn( 1025 LLMAgent, 1026 name=name, 1027 llm_provider=self.llm, 1028 system_prompt=config.get("system_prompt", "You are a helpful assistant."), 1029 persistence_dir=str(self._persistence_dir.parent), 1030 ) 1031 # Save to main's spawn registry so it persists across restarts 1032 await self._register_with_main(config) 1033 return actor 1034 1035 if agent_type == "dynamic": 1036 code = config.get("code", "").strip() 1037 if not code: 1038 logger.warning(f"[{self.name}] Dynamic spawn config has no code for '{name}'") 1039 return None 1040 from .dynamic_agent import DynamicAgent 1041 actor = await self.spawn( 1042 DynamicAgent, 1043 name=name, 1044 code=code, 1045 poll_interval=float(config.get("poll_interval") or 1.0), 1046 description=config.get("description", ""), 1047 input_schema=config.get("input_schema", {}), 1048 output_schema=config.get("output_schema", {}), 1049 llm_provider=self.llm, 1050 persistence_dir=str(self._persistence_dir.parent), 1051 ) 1052 await self._register_with_main(config) 1053 return actor 1054 1055 if agent_type == "manual": 1056 from .manual_agent import ManualAgent 1057 actor = await self.spawn( 1058 ManualAgent, 1059 name=name, 1060 llm_provider=self.llm, 1061 persistence_dir=str(self._persistence_dir.parent), 1062 ) 1063 await self._register_with_main(config) 1064 return actor 1065 1066 logger.warning(f"[{self.name}] Unknown agent type: '{agent_type}'") 1067 return None 1068 1069 async def _register_with_main(self, config: dict): 1070 """Tell main to add this agent to its spawn registry so it survives restarts.""" 1071 if not self._registry: 1072 return 1073 main = self._registry.find_by_name("main") 1074 if main and hasattr(main, "_save_to_spawn_registry"): 1075 main._save_to_spawn_registry(config) 1076 logger.info(f"[{self.name}] Registered '{config.get('name')}' with main's spawn registry") 1077 1078 # ── Execution ────────────────────────────────────────────────────────── 1079 1080 async def _execute(self, plan: list[dict]) -> dict: 1081 results: dict = {} 1082 completed: set[int] = set() 1083 remaining: list[dict] = list(plan) 1084 1085 while remaining: 1086 ready = [ 1087 s for s in remaining 1088 if all(d in completed for d in (s.get("depends_on") or [])) 1089 ] 1090 if not ready: 1091 logger.error(f"[{self.name}] Plan deadlock — aborting remaining steps") 1092 break 1093 1094 parallel = [s for s in ready if s.get("parallel", False)] 1095 sequential = [s for s in ready if not s.get("parallel", False)] 1096 1097 if parallel: 1098 await self._log(f"Parallel: steps {[s['step'] for s in parallel]}") 1099 outputs = await asyncio.gather( 1100 *[self._execute_step(s, results) for s in parallel], 1101 return_exceptions=True, 1102 ) 1103 for step, out in zip(parallel, outputs): 1104 results[step["step"]] = out if not isinstance(out, Exception) else {"error": str(out)} 1105 completed.add(step["step"]) 1106 remaining.remove(step) 1107 1108 for step in sequential: 1109 await self._log(f"Sequential: step {step['step']} → @{step['agent']}") 1110 results[step["step"]] = await self._execute_step(step, results) 1111 completed.add(step["step"]) 1112 remaining.remove(step) 1113 1114 return results 1115 1116 async def _execute_step(self, step: dict, prior: dict) -> dict: 1117 agent_name = step.get("agent", "main") 1118 task_text = step.get("task", "") 1119 depends_on = step.get("depends_on") or [] 1120 1121 # Inject context from prior steps 1122 if depends_on: 1123 ctx = [] 1124 for dep in depends_on: 1125 r = prior.get(dep, {}) 1126 t = (r.get("result") or r.get("text") or r.get("answer") or str(r))[:600] 1127 ctx.append(f"[Step {dep} result]: {t}") 1128 if ctx: 1129 task_text += "\n\nContext from previous steps:\n" + "\n".join(ctx) 1130 1131 if agent_name in ("main", self.name): 1132 return {"result": await self._llm_answer(task_text)} 1133 1134 await self._log(f" → @{agent_name}: {task_text[:60]}") 1135 result = await self._delegate(agent_name, task_text) 1136 if not result: 1137 return {"error": f"No response from {agent_name}"} 1138 # If agent reported an error, check if we can replan around it 1139 if "error" in result and "error_phase" in result: 1140 await self._log( 1141 f" ⚠ @{agent_name} failed ({result['error_phase']}): {result['error'][:80]}" 1142 ) 1143 # Try main as fallback synthesizer 1144 await self._log(f" → falling back to @main for this step") 1145 fallback = await self._llm_answer( 1146 f"The agent '{agent_name}' failed. Do your best to answer: {task_text}" 1147 ) 1148 return {"result": fallback, "fallback": True, "original_error": result["error"]} 1149 return result 1150 1151 # ── Delegation ───────────────────────────────────────────────────────── 1152 1153 async def _delegate(self, agent_name: str, task: str, timeout: float = 60.0) -> Optional[dict]: 1154 return await self._delegate_with_payload(agent_name, {"text": task}, timeout=timeout) 1155 1156 async def _delegate_with_payload(self, agent_name: str, payload: dict, timeout: float = 60.0) -> Optional[dict]: 1157 if not self._registry: 1158 return None 1159 target = self._registry.find_by_name(agent_name) 1160 if not target: 1161 logger.warning(f"[{self.name}] Agent '{agent_name}' not found for delegation") 1162 return {"error": f"Agent '{agent_name}' not found"} 1163 1164 import uuid 1165 task_id = str(uuid.uuid4())[:8] 1166 future: asyncio.Future = asyncio.get_running_loop().create_future() 1167 self._result_futures[task_id] = future 1168 1169 await self.send(target.actor_id, MessageType.TASK, { 1170 **payload, "_task_id": task_id, "_reply_to": self.actor_id 1171 }) 1172 try: 1173 return await asyncio.wait_for(future, timeout=timeout) 1174 except asyncio.TimeoutError: 1175 logger.warning(f"[{self.name}] Timeout from '{agent_name}'") 1176 return {"error": f"Timeout from {agent_name}"} 1177 finally: 1178 self._result_futures.pop(task_id, None) 1179 1180 # ── Synthesis ────────────────────────────────────────────────────────── 1181 1182 async def _synthesize(self, task: str, plan: list[dict], results: dict) -> str: 1183 if not self.llm: 1184 parts = [] 1185 for s in plan: 1186 r = results.get(s["step"], {}) 1187 t = r.get("result") or r.get("text") or r.get("answer") or str(r) 1188 parts.append(f"[@{s['agent']}]: {t}") 1189 return "\n\n".join(parts) 1190 1191 results_text = [] 1192 for s in plan: 1193 r = results.get(s["step"], {}) 1194 t = (r.get("result") or r.get("text") or r.get("answer") or str(r))[:800] 1195 results_text.append(f"Step {s['step']} (@{s['agent']}): {t}") 1196 1197 prompt = ( 1198 f"You collected results from multiple agents for this task:\n\n" 1199 f"ORIGINAL TASK: {task}\n\n" 1200 f"RESULTS:\n" + "\n\n".join(results_text) + 1201 "\n\nSynthesize into a single, clear, well-structured answer for the user. " 1202 "Do not mention agent names, step numbers, or internal system details." 1203 ) 1204 try: 1205 response, _ = await self.llm.complete( 1206 messages=[{"role": "user", "content": prompt}], 1207 system="You synthesize multi-agent results into clean, user-facing answers.", 1208 max_tokens=2048, 1209 ) 1210 return response 1211 except Exception as e: 1212 logger.error(f"[{self.name}] Synthesis failed: {e}") 1213 return "\n\n".join(results_text) 1214 1215 async def _llm_answer(self, task: str) -> str: 1216 if not self.llm: 1217 return f"[No LLM available: {task}]" 1218 try: 1219 response, _ = await self.llm.complete( 1220 messages=[{"role": "user", "content": task}], 1221 system="You are a helpful assistant.", 1222 max_tokens=2048, 1223 ) 1224 return response 1225 except Exception as e: 1226 return f"[LLM error: {e}]" 1227 1228 # ── Helpers ──────────────────────────────────────────────────────────── 1229 1230 async def _deferred_stop(self): 1231 await asyncio.sleep(2.0) 1232 await self._log("Self-terminating.") 1233 if self._registry: 1234 await self._registry.unregister(self.actor_id) 1235 await self.stop() 1236 1237 async def _log(self, msg: str): 1238 logger.info(f"[{self.name}] {msg}") 1239 await self._mqtt_publish( 1240 f"agents/{self.actor_id}/logs", 1241 {"type": "log", "message": msg, "timestamp": time.time()}, 1242 )
On-demand orchestrator. Spawned per complex task, self-terminates when done.
42 def __init__( 43 self, 44 llm_provider: Optional[LLMProvider] = None, 45 task: str = "", 46 reply_to_id: str = "", 47 reply_task_id: str = "", 48 auto_terminate: bool = True, 49 **kwargs, 50 ): 51 kwargs.setdefault("name", "planner") 52 super().__init__(**kwargs) 53 self.llm = llm_provider 54 self._task = task 55 self._reply_to_id = reply_to_id 56 self._reply_task_id = reply_task_id 57 self._auto_terminate = auto_terminate 58 self._result_futures: dict[str, asyncio.Future] = {} 59 self._spawned_by_planner: list[str] = [] # agents we created this run
66 async def on_start(self): 67 await self._log(f"Planner ready. Task: {self._task[:80]}") 68 if self._task: 69 asyncio.create_task(self._report_plan(self._task))
Called when actor starts. Override for init logic.
73 async def handle_message(self, msg: Message): 74 if msg.type == MessageType.TASK: 75 payload = msg.payload if isinstance(msg.payload, dict) else {"text": str(msg.payload)} 76 task_text = payload.get("text") or payload.get("task") or str(msg.payload) 77 self._reply_to_id = payload.get("_reply_to") or msg.reply_to or msg.sender_id or self._reply_to_id 78 task_id = payload.get("_task_id") 79 await self._log(f"Received task: {task_text[:80]}") 80 result = await self._run_plan(task_text) 81 if self._reply_to_id: 82 # Use the initiating task_id (from main) so the future resolves, 83 # falling back to the message-level task_id if present 84 resolve_id = self._reply_task_id or task_id 85 reply = {"result": result, "text": result} 86 if resolve_id: 87 reply["_task_id"] = resolve_id 88 if self._spawned_by_planner: 89 reply["spawned"] = self._spawned_by_planner 90 await self.send(self._reply_to_id, MessageType.RESULT, reply) 91 92 elif msg.type == MessageType.RESULT: 93 payload = msg.payload if isinstance(msg.payload, dict) else {} 94 task_id = payload.get("_task_id") 95 if task_id and task_id in self._result_futures: 96 fut = self._result_futures[task_id] 97 if not fut.done(): 98 fut.set_result(payload)
Handle messages not caught by default handlers.
33class DynamicAgent(Actor): 34 """ 35 Generic actor shell. Core behavior is provided as Python source code strings. 36 The LLM writes setup/process/handle_task functions; this class runs them. 37 """ 38 39 def __init__( 40 self, 41 code: str, # LLM-generated Python source 42 poll_interval: float = 1.0, # seconds between process() calls 43 description: str = "", # what this agent does 44 input_schema: dict = None, # expected task payload fields 45 output_schema: dict = None, # returned result fields 46 llm_provider=None, # optional LLM for agent.llm.chat() 47 **kwargs, 48 ): 49 super().__init__(**kwargs) 50 self._code = code 51 self.poll_interval = poll_interval 52 self.description = description 53 self.input_schema = input_schema or {} 54 self.output_schema = output_schema or {} 55 self._llm_provider = llm_provider 56 57 # Compiled functions — populated in on_start 58 self._fn_setup = None 59 self._fn_process = None 60 self._fn_handle_task = None 61 62 # Namespace shared across all calls (agent can store state here) 63 self._ns: dict = {} 64 65 # Cost tracking (populated by _LLMInterface if LLM is used) 66 self.total_input_tokens = 0 67 self.total_output_tokens = 0 68 self.total_cost_usd = 0.0 69 70 # Error tracking for health classification 71 self._consecutive_errors: int = 0 72 self._error_threshold: int = 3 # DEGRADED after this many 73 self._last_error_time: float = 0.0 74 self._error_phase: str = "" # compile|setup|process|handle_task 75 76 # Public API exposed to generated code via `agent` parameter 77 self._api = _AgentAPI(self) 78 79 # ── Lifecycle ────────────────────────────────────────────────────────── 80 81 async def on_start(self): 82 # ── Compile with LLM self-correction on syntax errors ───────────── 83 current_code = self._code 84 error_msg = self._compile_code(current_code) 85 86 if error_msg: 87 for attempt in range(1, self._MAX_COMPILE_RETRIES + 1): 88 logger.warning( 89 f"[{self.name}] Compile error (attempt {attempt}): {error_msg}" 90 ) 91 fixed = await self._fix_syntax_with_llm(current_code, error_msg) 92 if fixed is None: 93 # LLM unavailable — no point retrying 94 break 95 self._ns = {} # fresh namespace for retry 96 new_err = self._compile_code(fixed) 97 if new_err is None: 98 # Fix worked — update stored code so restarts use the good version 99 self._code = fixed 100 error_msg = None 101 logger.info(f"[{self.name}] Code fixed by LLM after {attempt} attempt(s).") 102 await self._mqtt_publish( 103 f"agents/{self.actor_id}/logs", 104 {"type": "log", 105 "message": f"Syntax error fixed by LLM after {attempt} attempt(s).", 106 "timestamp": time.time()}, 107 ) 108 break 109 # Fix compiled but still broken — feed it back for the next attempt 110 current_code = fixed 111 error_msg = new_err 112 113 if error_msg: 114 # All attempts exhausted — publish fatal and stop 115 err_exc = SyntaxError(error_msg) 116 logger.error(f"[{self.name}] Code compilation failed permanently: {error_msg}") 117 await self._publish_error(phase="compile", error=err_exc, 118 traceback_str=error_msg, fatal=True) 119 return 120 121 # ── setup() ─────────────────────────────────────────────────────── 122 if self._fn_setup: 123 # Run setup as a background task so long-running loops (e.g. aiomqtt 124 # subscriptions) don't block on_start() and prevent heartbeats from firing. 125 self._tasks.append(asyncio.create_task(self._run_setup())) 126 else: 127 if self._fn_process: 128 self._tasks.append(asyncio.create_task(self._process_loop())) 129 130 # Publish manifest immediately so main's registry knows this agent exists 131 # even if it never calls publish() (pure handle_task agents, etc.) 132 await self._api._publish_manifest() 133 134 async def on_stop(self): 135 # Give generated code a chance to clean up 136 cleanup = self._ns.get("cleanup") 137 if cleanup: 138 try: 139 await cleanup(self._api) 140 except Exception: 141 pass 142 143 # ── Code compilation ─────────────────────────────────────────────────── 144 145 @staticmethod 146 def _sanitize_code(code: str) -> str: 147 """ 148 Block-aware sanitizer. Removes LLM self-setup patterns entirely: 149 - try/except blocks containing LLM imports 150 - if/else blocks checking api_key or llm_backend 151 - orphan else:/elif: that follow sanitized blocks 152 - call_llm/call_openai/call_ollama functions -> agent.llm shim 153 - standalone bad lines 154 """ 155 import re 156 157 LLM_PATTERNS = [ 158 r"\bimport\s+(openai|anthropic|ollama|langchain)\b", 159 r"\bfrom\s+(openai|anthropic|ollama|langchain)\b", 160 r"\b(OPENAI_API_KEY|ANTHROPIC_API_KEY)\b", 161 r"os\.environ.*API_KEY", 162 r"\b(openai|anthropic|ollama)\.(OpenAI|Anthropic|Client|AsyncOpenAI|AsyncAnthropic)\b", 163 # api_key as a variable assignment (not as a dict key like 'api_key': ...) 164 r"^\s*api_key\s*=", 165 # llm_backend as a variable assignment only 166 r"^\s*agent\.state\[.llm_backend.\]\s*=", 167 ] 168 169 def line_is_bad(line): 170 return any(re.search(p, line) for p in LLM_PATTERNS) 171 172 def collect_block(lines, start, base_indent, conts=("except","else","finally","elif")): 173 j, block = start, [] 174 pat = r"\s*(" + "|".join(conts) + r")\b" if conts else r"(?!x)x" 175 while j < len(lines): 176 bl = lines[j] 177 bl_ind = len(bl) - len(bl.lstrip()) if bl.strip() else base_indent + 4 178 if bl.strip() and bl_ind <= base_indent and not re.match(pat, bl): 179 break 180 block.append(bl) 181 j += 1 182 return block, j 183 184 lines = code.split("\n") 185 result = [] 186 i = 0 187 last_sanitized = False 188 189 while i < len(lines): 190 line = lines[i] 191 stripped = line.strip() 192 indent = len(line) - len(line.lstrip()) if stripped else 0 193 prefix = " " * indent 194 195 if not stripped: 196 result.append(line) 197 last_sanitized = False 198 i += 1 199 continue 200 201 # try: blocks — nuke entirely if they touch LLM 202 if stripped == "try:": 203 block, j = collect_block(lines, i + 1, indent) 204 full = [line] + block 205 if any(line_is_bad(l) for l in full): 206 result.append(prefix + "pass # sanitized: LLM setup block") 207 last_sanitized = True 208 else: 209 result.extend(full) 210 last_sanitized = False 211 i = j 212 continue 213 214 # if/elif whose condition references LLM vars — nuke whole branch 215 if re.match(r"\s*(if|elif)\b", line) and line_is_bad(line): 216 _, j = collect_block(lines, i + 1, indent, ("elif", "else")) 217 result.append(prefix + "pass # sanitized: LLM conditional") 218 last_sanitized = True 219 i = j 220 continue 221 222 # orphan else:/elif: after a sanitized block — drop silently 223 if re.match(r"\s*(else\s*:|elif\b)", line) and last_sanitized: 224 _, j = collect_block(lines, i + 1, indent, ()) 225 i = j 226 continue 227 228 # LLM wrapper functions — replace with agent.llm shim 229 fn_m = re.match( 230 r"(\s*)(async\s+)?def\s+" 231 r"(call_llm|call_openai|call_ollama|call_anthropic|call_gpt|" 232 r"get_llm|setup_llm|create_llm|query_llm|ask_llm|llm_call)\s*\(", 233 line, 234 ) 235 if fn_m: 236 _, j = collect_block(lines, i + 1, len(fn_m.group(1)), ()) 237 p, fname = fn_m.group(1), fn_m.group(3) 238 result += [ 239 p + "async def " + fname + "(agent, messages, system='', **kw):", 240 p + " # sanitized: rewired to agent.llm", 241 p + " sys_p = system or next((m.get('content','') for m in messages if m.get('role')=='system'), '')", 242 p + " msgs = [m for m in messages if m.get('role') != 'system']", 243 p + " return await agent.llm.complete(messages=msgs, system=sys_p)", 244 ] 245 last_sanitized = False 246 i = j 247 continue 248 249 # standalone bad lines 250 if line_is_bad(line): 251 result.append(prefix + "pass # sanitized: " + stripped[:60]) 252 last_sanitized = True 253 i += 1 254 continue 255 256 last_sanitized = False 257 result.append(line) 258 i += 1 259 260 return "\n".join(result) 261 262 263 264 265 # Max times on_start will ask the LLM to fix a syntax error before giving up 266 _MAX_COMPILE_RETRIES = 2 267 268 def _compile_code(self, code: Optional[str] = None) -> Optional[str]: 269 """ 270 Sanitize then compile LLM-generated code into self._ns. 271 272 Returns the error message string if compilation fails, None on success. 273 Callers use the error string to ask the LLM to fix the code and retry 274 (see on_start / _fix_syntax_with_llm). 275 """ 276 source = code if code is not None else self._code 277 clean = self._sanitize_code(source) 278 279 # Pre-inject the LLM shim so generated code can call agent.llm directly 280 def _get_llm_shim(*args, **kwargs): 281 return self._api.llm 282 self._ns["get_llm"] = _get_llm_shim 283 self._ns["setup_llm"] = _get_llm_shim 284 self._ns["create_llm"] = _get_llm_shim 285 286 try: 287 exec(compile(clean, f"<{self.name}>", "exec"), self._ns) 288 self._fn_setup = self._ns.get("setup") 289 self._fn_process = self._ns.get("process") 290 self._fn_handle_task = self._ns.get("handle_task") 291 fns = [f for f in ["setup", "process", "handle_task", "cleanup"] if f in self._ns] 292 logger.info(f"[{self.name}] Code compiled OK. Functions: {fns}") 293 if not fns: 294 logger.warning(f"[{self.name}] No functions found in compiled code.") 295 return None # success 296 except Exception as e: 297 return f"{type(e).__name__}: {e}" 298 299 async def _fix_syntax_with_llm(self, bad_code: str, error_msg: str) -> Optional[str]: 300 """ 301 Ask the configured LLM to fix a syntax error in agent code. 302 303 Returns the (possibly still-broken) code string from the LLM, or None 304 only if the LLM is completely unavailable (no provider, API error). 305 The caller is responsible for verifying the fix with _compile_code(). 306 """ 307 if self._llm_provider is None: 308 return None 309 310 prompt = ( 311 "The following Python code has a syntax error.\n" 312 f"Error: {error_msg}\n\n" 313 "Fix ONLY the syntax error. Do not change logic or add features.\n" 314 "Return ONLY the corrected Python code — no explanations, " 315 "no markdown fences, no commentary.\n\n" 316 f"```python\n{bad_code}\n```" 317 ) 318 logger.info(f"[{self.name}] Asking LLM to fix syntax error: {error_msg[:120]}") 319 await self._mqtt_publish( 320 f"agents/{self.actor_id}/logs", 321 {"type": "log", 322 "message": f"Syntax error — asking LLM to fix: {error_msg[:120]}", 323 "timestamp": time.time()}, 324 ) 325 try: 326 response, usage = await self._llm_provider.complete( 327 messages=[{"role": "user", "content": prompt}], 328 system="You are a Python syntax expert. Return only valid Python code.", 329 max_tokens=4096, 330 ) 331 # Track cost 332 if hasattr(self, "total_input_tokens"): 333 self.total_input_tokens += usage.get("input_tokens", 0) 334 self.total_output_tokens += usage.get("output_tokens", 0) 335 self.total_cost_usd += usage.get("cost_usd", 0.0) 336 337 # Strip markdown fences the LLM may add despite instructions 338 fixed = response.strip() 339 if fixed.startswith("```"): 340 fixed = "\n".join( 341 l for l in fixed.split("\n") 342 if not l.strip().startswith("```") 343 ).strip() 344 345 return fixed # caller validates with _compile_code() 346 347 except Exception as e: 348 logger.warning(f"[{self.name}] LLM fix call failed: {e}") 349 return None # only None when LLM is truly unreachable 350 351 # ── Setup wrapper ─────────────────────────────────────────────────────── 352 353 async def _run_setup(self): 354 """ 355 Run setup() as a background task. 356 - Errors in setup() are published as fatal errors (agent won't restart). 357 - If process() is also defined, it is started AFTER setup() returns. 358 For agents whose setup() never returns (e.g. aiomqtt subscription loops), 359 process() is simply not started — the subscription loop IS the process. 360 """ 361 try: 362 await self._fn_setup(self._api) 363 logger.info(f"[{self.name}] setup() completed.") 364 except asyncio.CancelledError: 365 return 366 except Exception as e: 367 err = traceback.format_exc() 368 logger.error(f"[{self.name}] setup() failed: {e}\n{err}") 369 await self._publish_error(phase="setup", error=e, traceback_str=err, fatal=True) 370 return 371 # setup() returned cleanly — start process() loop if defined 372 if self._fn_process and self.state not in (ActorState.STOPPED, ActorState.FAILED): 373 self._tasks.append(asyncio.create_task(self._process_loop())) 374 375 # ── Process loop ─────────────────────────────────────────────────────── 376 377 async def _process_loop(self): 378 """Continuously call the generated process() function.""" 379 while self.state not in (ActorState.STOPPED, ActorState.FAILED): 380 if self.state == ActorState.PAUSED: 381 await asyncio.sleep(self.poll_interval) 382 continue 383 try: 384 await self._fn_process(self._api) 385 self._reset_error_count() 386 except asyncio.CancelledError: 387 break 388 except Exception as e: 389 self.metrics.errors += 1 390 tb = traceback.format_exc() 391 logger.error(f"[{self.name}] process() error: {e}\n{tb}") 392 await self._publish_error(phase="process", error=e, traceback_str=tb) 393 backoff = min(2 ** self._consecutive_errors, 30) 394 await asyncio.sleep(backoff) 395 await asyncio.sleep(self.poll_interval) 396 397 # ── Message handling ─────────────────────────────────────────────────── 398 399 async def handle_message(self, msg: Message): 400 if msg.type == MessageType.TASK: 401 self.metrics.messages_processed += 1 402 if self._fn_handle_task: 403 try: 404 result = await self._fn_handle_task(self._api, msg.payload or {}) 405 if msg.sender_id and result is not None: 406 await self.send(msg.sender_id, MessageType.RESULT, result) 407 except Exception as e: 408 tb = traceback.format_exc() 409 logger.error(f"[{self.name}] handle_task() error: {e}\n{tb}") 410 await self._publish_error(phase="handle_task", error=e, traceback_str=tb) 411 if msg.sender_id: 412 await self.send(msg.sender_id, MessageType.RESULT, { 413 "error": str(e), 414 "error_phase": "handle_task", 415 "agent": self.name, 416 }) 417 else: 418 if msg.sender_id: 419 await self.send(msg.sender_id, MessageType.RESULT, 420 {"info": f"{self.name} has no handle_task defined"}) 421 422 async def _publish_error( 423 self, 424 phase: str, 425 error: Exception, 426 traceback_str: str = "", 427 fatal: bool = False, 428 ): 429 """ 430 Publish a structured error event to agents/{id}/errors AND send 431 a direct actor message to MonitorAgent so it works without MQTT. 432 """ 433 self._consecutive_errors += 1 434 self._last_error_time = time.time() 435 self._error_phase = phase 436 severity = ( 437 "critical" 438 if fatal or self._consecutive_errors >= self._error_threshold 439 else "warning" 440 ) 441 event = { 442 "actor_id": self.actor_id, 443 "name": self.name, 444 "phase": phase, 445 "error": str(error), 446 "traceback": traceback_str[-1200:] if traceback_str else "", 447 "consecutive": self._consecutive_errors, 448 "fatal": fatal, 449 "severity": severity, 450 "degraded": self._consecutive_errors >= self._error_threshold, 451 "timestamp": time.time(), 452 } 453 await self._mqtt_publish(f"agents/{self.actor_id}/errors", event) 454 # Direct actor message to monitor (works without MQTT broker) 455 if self._registry: 456 monitor = self._registry.find_by_name("monitor") 457 if monitor and monitor.actor_id != self.actor_id: 458 try: 459 await self.send(monitor.actor_id, MessageType.TASK, { 460 **event, 461 "_monitor_error_event": True, 462 }) 463 except Exception: 464 pass 465 # Mirror to /alert so the dashboard picks it up immediately 466 await self._mqtt_publish(f"agents/{self.actor_id}/alert", { 467 "actor_id": self.actor_id, 468 "name": self.name, 469 "message": f"[{phase}] {error}", 470 "severity": severity, 471 "timestamp": time.time(), 472 }) 473 474 def _reset_error_count(self): 475 if self._consecutive_errors > 0: 476 logger.info(f"[{self.name}] Recovered — resetting error counter.") 477 self._consecutive_errors = 0 478 self._error_phase = "" 479 480 def get_status(self) -> dict: 481 s = super().get_status() 482 s["description"] = self.description 483 s["code"] = self._code 484 s["agent_type"] = "dynamic" 485 return s 486 487 def _build_heartbeat(self) -> dict: 488 hb = super()._build_heartbeat() 489 hb["code"] = self._code # include code in every heartbeat 490 hb["description"] = self.description 491 hb["agent_type"] = "dynamic" 492 return hb 493 494 def _current_task_description(self) -> str: 495 return self.description or "running dynamic code"
Generic actor shell. Core behavior is provided as Python source code strings. The LLM writes setup/process/handle_task functions; this class runs them.
39 def __init__( 40 self, 41 code: str, # LLM-generated Python source 42 poll_interval: float = 1.0, # seconds between process() calls 43 description: str = "", # what this agent does 44 input_schema: dict = None, # expected task payload fields 45 output_schema: dict = None, # returned result fields 46 llm_provider=None, # optional LLM for agent.llm.chat() 47 **kwargs, 48 ): 49 super().__init__(**kwargs) 50 self._code = code 51 self.poll_interval = poll_interval 52 self.description = description 53 self.input_schema = input_schema or {} 54 self.output_schema = output_schema or {} 55 self._llm_provider = llm_provider 56 57 # Compiled functions — populated in on_start 58 self._fn_setup = None 59 self._fn_process = None 60 self._fn_handle_task = None 61 62 # Namespace shared across all calls (agent can store state here) 63 self._ns: dict = {} 64 65 # Cost tracking (populated by _LLMInterface if LLM is used) 66 self.total_input_tokens = 0 67 self.total_output_tokens = 0 68 self.total_cost_usd = 0.0 69 70 # Error tracking for health classification 71 self._consecutive_errors: int = 0 72 self._error_threshold: int = 3 # DEGRADED after this many 73 self._last_error_time: float = 0.0 74 self._error_phase: str = "" # compile|setup|process|handle_task 75 76 # Public API exposed to generated code via `agent` parameter 77 self._api = _AgentAPI(self)
81 async def on_start(self): 82 # ── Compile with LLM self-correction on syntax errors ───────────── 83 current_code = self._code 84 error_msg = self._compile_code(current_code) 85 86 if error_msg: 87 for attempt in range(1, self._MAX_COMPILE_RETRIES + 1): 88 logger.warning( 89 f"[{self.name}] Compile error (attempt {attempt}): {error_msg}" 90 ) 91 fixed = await self._fix_syntax_with_llm(current_code, error_msg) 92 if fixed is None: 93 # LLM unavailable — no point retrying 94 break 95 self._ns = {} # fresh namespace for retry 96 new_err = self._compile_code(fixed) 97 if new_err is None: 98 # Fix worked — update stored code so restarts use the good version 99 self._code = fixed 100 error_msg = None 101 logger.info(f"[{self.name}] Code fixed by LLM after {attempt} attempt(s).") 102 await self._mqtt_publish( 103 f"agents/{self.actor_id}/logs", 104 {"type": "log", 105 "message": f"Syntax error fixed by LLM after {attempt} attempt(s).", 106 "timestamp": time.time()}, 107 ) 108 break 109 # Fix compiled but still broken — feed it back for the next attempt 110 current_code = fixed 111 error_msg = new_err 112 113 if error_msg: 114 # All attempts exhausted — publish fatal and stop 115 err_exc = SyntaxError(error_msg) 116 logger.error(f"[{self.name}] Code compilation failed permanently: {error_msg}") 117 await self._publish_error(phase="compile", error=err_exc, 118 traceback_str=error_msg, fatal=True) 119 return 120 121 # ── setup() ─────────────────────────────────────────────────────── 122 if self._fn_setup: 123 # Run setup as a background task so long-running loops (e.g. aiomqtt 124 # subscriptions) don't block on_start() and prevent heartbeats from firing. 125 self._tasks.append(asyncio.create_task(self._run_setup())) 126 else: 127 if self._fn_process: 128 self._tasks.append(asyncio.create_task(self._process_loop())) 129 130 # Publish manifest immediately so main's registry knows this agent exists 131 # even if it never calls publish() (pure handle_task agents, etc.) 132 await self._api._publish_manifest()
Called when actor starts. Override for init logic.
134 async def on_stop(self): 135 # Give generated code a chance to clean up 136 cleanup = self._ns.get("cleanup") 137 if cleanup: 138 try: 139 await cleanup(self._api) 140 except Exception: 141 pass
Called when actor stops. Override for cleanup.
399 async def handle_message(self, msg: Message): 400 if msg.type == MessageType.TASK: 401 self.metrics.messages_processed += 1 402 if self._fn_handle_task: 403 try: 404 result = await self._fn_handle_task(self._api, msg.payload or {}) 405 if msg.sender_id and result is not None: 406 await self.send(msg.sender_id, MessageType.RESULT, result) 407 except Exception as e: 408 tb = traceback.format_exc() 409 logger.error(f"[{self.name}] handle_task() error: {e}\n{tb}") 410 await self._publish_error(phase="handle_task", error=e, traceback_str=tb) 411 if msg.sender_id: 412 await self.send(msg.sender_id, MessageType.RESULT, { 413 "error": str(e), 414 "error_phase": "handle_task", 415 "agent": self.name, 416 }) 417 else: 418 if msg.sender_id: 419 await self.send(msg.sender_id, MessageType.RESULT, 420 {"info": f"{self.name} has no handle_task defined"})
Handle messages not caught by default handlers.
74class InstallerAgent(Actor): 75 """ 76 Pre-defined agent that installs Python packages on demand. 77 Uses sys.executable so packages are installed into the active venv. 78 """ 79 80 def __init__(self, **kwargs): 81 kwargs.setdefault("name", "installer") 82 super().__init__(**kwargs) 83 self.protected = True 84 self._install_log: list[dict] = [] 85 86 def _current_task_description(self) -> str: 87 return "idle" 88 89 async def on_start(self): 90 logger.info(f"[{self.name}] Installer ready — using: {sys.executable}") 91 await self._mqtt_publish( 92 f"agents/{self.actor_id}/logs", 93 {"type": "log", "message": f"Installer ready ({sys.executable})", "timestamp": time.time()}, 94 ) 95 await self.publish_manifest( 96 description="Installs Python packages on demand via pip", 97 capabilities=["pip_install", "package_management"], 98 ) 99 100 async def handle_message(self, msg: Message): 101 if msg.type == MessageType.TASK: 102 result = await self._handle_install(msg) 103 # Echo task_id back so caller's future can resolve 104 if isinstance(msg.payload, dict): 105 task_id = msg.payload.get("task") or msg.payload.get("_task_id") 106 if task_id: 107 result["task"] = task_id 108 result["_task_id"] = task_id 109 target = msg.reply_to or msg.sender_id 110 if target: 111 await self.send(target, MessageType.RESULT, result) 112 113 async def _handle_install(self, msg: Message) -> dict: 114 payload = msg.payload if isinstance(msg.payload, dict) else {} 115 action = payload.get("action", "install") 116 117 if action == "install": 118 packages = payload.get("packages", []) 119 if isinstance(packages, str): 120 packages = [p.strip() for p in packages.replace(",", " ").split()] 121 return await self._install_packages(packages) 122 123 if action == "check": 124 packages = payload.get("packages", []) 125 if isinstance(packages, str): 126 packages = [p.strip() for p in packages.replace(",", " ").split()] 127 return self._check_packages(packages) 128 129 if action == "resolve": 130 return self._resolve_imports(payload.get("imports", [])) 131 132 if action == "history": 133 return {"history": self._install_log[-20:]} 134 135 if action == "node_install": 136 # Install packages on a remote node via SSH 137 # payload: {host, user, packages, password (opt), key_path (opt)} 138 return await self._node_install(payload) 139 140 if action == "node_deploy": 141 # Full bootstrap: copy remote_runner.py + install deps + start runner 142 # payload: {host, user, node_name, broker, password (opt), key_path (opt)} 143 return await self._node_deploy(payload) 144 145 if action == "node_run": 146 # Run an arbitrary command on a remote node via SSH 147 # payload: {host, user, command, password (opt), key_path (opt)} 148 return await self._node_run(payload) 149 150 return {"error": f"Unknown action: {action}"} 151 152 # ── Core install logic ────────────────────────────────────────────────── 153 154 async def _install_packages(self, packages: list[str]) -> dict: 155 if not packages: 156 return {"error": "No packages specified"} 157 158 results = {} 159 failed = [] 160 161 for pkg in packages: 162 pkg = pkg.strip() 163 if not pkg: 164 continue 165 166 # Resolve import name → pip name (e.g. "cv2" → "opencv-python") 167 pip_name = IMPORT_TO_PACKAGE.get(pkg, pkg) 168 169 # Check if already importable (invalidate cache so fresh installs show up) 170 import_name = PACKAGE_TO_IMPORT.get(pip_name, pip_name) 171 if self._is_installed(import_name): 172 logger.info(f"[{self.name}] {pip_name} already installed.") 173 results[pip_name] = "already_installed" 174 continue 175 176 logger.info(f"[{self.name}] Installing {pip_name} into {sys.executable}...") 177 await self._mqtt_publish( 178 f"agents/{self.actor_id}/logs", 179 {"type": "log", "message": f"Installing {pip_name}...", "timestamp": time.time()}, 180 ) 181 182 success, output = await self._pip_install(pip_name) 183 184 # duckduckgo-search was renamed to ddgs in v9 — try the other name as fallback 185 if not success and pip_name in ("duckduckgo-search", "ddgs"): 186 alt = "ddgs" if pip_name == "duckduckgo-search" else "duckduckgo-search" 187 logger.info(f"[{self.name}] Trying alternative name: {alt}") 188 success, output = await self._pip_install(alt) 189 if success: 190 pip_name = alt 191 192 # pdfplumber sometimes fails on Windows — try pymupdf (fitz) as fallback 193 if not success and pip_name == "pdfplumber": 194 logger.info(f"[{self.name}] pdfplumber failed, trying pymupdf as fallback...") 195 success, output = await self._pip_install("pymupdf") 196 if success: 197 pip_name = "pymupdf" 198 199 results[pip_name] = "installed" if success else f"failed: {output[-300:]}" 200 if not success: 201 failed.append(pip_name) 202 203 self._install_log.append({ 204 "package": pip_name, 205 "success": success, 206 "timestamp": time.time(), 207 "output": output[-500:], 208 }) 209 210 if success: 211 status = f"✓ {pip_name} installed" 212 else: 213 # Show the actual pip error so failures are diagnosable 214 err_snippet = output[-400:].strip().replace("\n", " | ") 215 status = f"✗ {pip_name} FAILED: {err_snippet}" 216 logger.info(f"[{self.name}] {status}") 217 await self._mqtt_publish( 218 f"agents/{self.actor_id}/logs", 219 {"type": "log", "message": status, "timestamp": time.time()}, 220 ) 221 222 return { 223 "results": results, 224 "failed": failed, 225 "success": len(failed) == 0, 226 "message": f"Installed {len(results) - len(failed)}/{len(results)} packages", 227 } 228 229 async def _pip_install(self, package: str) -> tuple[bool, str]: 230 """Run pip install using the same interpreter that launched this process. 231 232 sys.executable inside a venv points to venv/Scripts/python.exe (Windows) 233 or venv/bin/python (Linux/Mac), so packages always land in the right place. 234 235 Uses subprocess.run() in a thread executor instead of asyncio.create_subprocess_exec() 236 because asyncio subprocesses are unreliable on Windows with SelectorEventLoop 237 (the default in some Python versions / environments). subprocess.run() works 238 correctly on all platforms. 239 """ 240 import subprocess 241 242 cmd = [sys.executable, "-m", "pip", "install", package, "--quiet"] 243 if sys.platform != "win32": 244 cmd.append("--break-system-packages") 245 246 def _run_pip() -> tuple[bool, str]: 247 try: 248 result = subprocess.run( 249 cmd, 250 stdout=subprocess.PIPE, 251 stderr=subprocess.PIPE, 252 timeout=180, 253 ) 254 output = (result.stdout + result.stderr).decode("utf-8", errors="replace") 255 return result.returncode == 0, output 256 except subprocess.TimeoutExpired: 257 return False, "pip timed out after 180s" 258 except FileNotFoundError: 259 return False, f"Python executable not found: {sys.executable}" 260 except Exception as e: 261 return False, f"{type(e).__name__}: {e}" 262 263 try: 264 loop = asyncio.get_event_loop() 265 success, output = await loop.run_in_executor(None, _run_pip) 266 267 if success: 268 # Refresh import machinery so the new package is visible immediately 269 importlib.invalidate_caches() 270 271 return success, output 272 273 except Exception as e: 274 return False, f"Executor error: {type(e).__name__}: {e}" 275 276 def _is_installed(self, import_name: str) -> bool: 277 """Check importability, always refreshing the import cache first.""" 278 importlib.invalidate_caches() 279 try: 280 importlib.import_module(import_name) 281 return True 282 except ImportError: 283 return False 284 285 # ── Helper actions ────────────────────────────────────────────────────── 286 287 def _check_packages(self, packages: list[str]) -> dict: 288 status = {} 289 for pkg in packages: 290 pip_name = IMPORT_TO_PACKAGE.get(pkg, pkg) 291 import_name = PACKAGE_TO_IMPORT.get(pip_name, pip_name) 292 status[pkg] = "installed" if self._is_installed(import_name) else "missing" 293 return {"status": status} 294 295 def _resolve_imports(self, imports: list[str]) -> dict: 296 return {"resolved": {imp: IMPORT_TO_PACKAGE.get(imp, imp) for imp in imports}} 297 298 # ── Remote node helpers (SSH via asyncssh) ────────────────────────────── 299 300 def _ssh_kwargs(self, payload: dict) -> dict: 301 """Build asyncssh connection kwargs from a task payload.""" 302 kwargs = dict( 303 host = payload["host"], 304 username = payload.get("user", "pi"), 305 known_hosts = None, # disable host key checking for LAN deploys 306 ) 307 if payload.get("password"): 308 kwargs["password"] = payload["password"] 309 if payload.get("key_path"): 310 kwargs["client_keys"] = [payload["key_path"]] 311 return kwargs 312 313 async def _ssh_run(self, conn, command: str) -> tuple[bool, str]: 314 """Run a single command over an open SSH connection. Returns (ok, output).""" 315 result = await conn.run(command, check=False) 316 output = (result.stdout or "") + (result.stderr or "") 317 return result.exit_status == 0, output.strip() 318 319 def _log_remote(self, message: str): 320 logger.info(f"[{self.name}] {message}") 321 asyncio.create_task(self._mqtt_publish( 322 f"agents/{self.actor_id}/logs", 323 {"type": "log", "message": message, "timestamp": time.time()}, 324 )) 325 326 async def _node_install(self, payload: dict) -> dict: 327 """ 328 Install pip packages on a remote node via SSH. 329 330 payload keys: 331 host — IP or hostname of the remote machine 332 user — SSH username (default: "pi") 333 packages — list of package names to install 334 password — SSH password (optional, prefer key auth) 335 key_path — path to SSH private key (optional) 336 """ 337 try: 338 import asyncssh 339 except ImportError: 340 return {"error": "asyncssh not installed. Run: pip install asyncssh"} 341 342 host = payload.get("host") 343 packages = payload.get("packages", []) 344 if isinstance(packages, str): 345 packages = [p.strip() for p in packages.replace(",", " ").split()] 346 if not host: 347 return {"error": "Missing 'host' in payload"} 348 if not packages: 349 return {"error": "No packages specified"} 350 351 pkg_str = " ".join(packages) 352 self._log_remote(f"Installing {pkg_str} on {host}...") 353 354 try: 355 async with asyncssh.connect(**self._ssh_kwargs(payload)) as conn: 356 ok, output = await self._ssh_run( 357 conn, 358 f"pip install {pkg_str} --break-system-packages -q 2>&1" 359 ) 360 if ok: 361 self._log_remote(f"✓ {pkg_str} installed on {host}") 362 return {"success": True, "host": host, "packages": packages, "output": output[-300:]} 363 else: 364 self._log_remote(f"✗ Install failed on {host}: {output[-200:]}") 365 return {"success": False, "host": host, "error": output[-400:]} 366 367 except Exception as e: 368 return {"success": False, "host": host, "error": str(e)} 369 370 async def _node_deploy(self, payload: dict) -> dict: 371 """ 372 Full bootstrap of a new Wactorz edge node via SSH. 373 374 Steps: 375 1. Create ~/wactorz/ directory 376 2. Upload remote_runner.py 377 3. Install aiomqtt (the only runtime dependency) 378 4. Kill any existing runner with the same node name 379 5. Start the runner in the background 380 6. Verify it appears online within 15 seconds 381 382 payload keys: 383 host — IP or hostname 384 user — SSH username (default: "pi") 385 node_name — name this node will use (default: "remote-node") 386 broker — MQTT broker host reachable FROM the Pi (default: "localhost") 387 password — SSH password (optional) 388 key_path — path to SSH private key (optional) 389 port — MQTT broker port (default: 1883) 390 """ 391 try: 392 import asyncssh 393 except ImportError: 394 return {"error": "asyncssh not installed. Run: pip install asyncssh"} 395 396 host = payload.get("host") 397 user = payload.get("user", "pi") 398 node_name = payload.get("node_name", "remote-node") 399 broker = payload.get("broker", "localhost") 400 mqtt_port = payload.get("port", 1883) 401 402 if not host: 403 return {"error": "Missing 'host' in payload"} 404 405 # Find remote_runner.py relative to this file 406 import pathlib 407 candidates = [ 408 pathlib.Path(__file__).parent.parent / "remote_runner.py", 409 pathlib.Path("remote_runner.py"), 410 pathlib.Path(__file__).parent.parent.parent / "remote_runner.py", 411 ] 412 runner_path = next((p for p in candidates if p.exists()), None) 413 if not runner_path: 414 return {"error": "remote_runner.py not found. Make sure it is in the wactorz root."} 415 416 self._log_remote(f"Deploying node '{node_name}' to {user}@{host}...") 417 418 try: 419 async with asyncssh.connect(**self._ssh_kwargs(payload)) as conn: 420 421 # 1. Create directory 422 await self._ssh_run(conn, "mkdir -p ~/wactorz") 423 self._log_remote(f"[{node_name}] Directory created.") 424 425 # 2. Upload remote_runner.py 426 async with conn.start_sftp_client() as sftp: 427 await sftp.put(str(runner_path), f"/home/{user}/wactorz/remote_runner.py") 428 self._log_remote(f"[{node_name}] remote_runner.py uploaded.") 429 430 # 3. Install the only required dependency 431 ok, out = await self._ssh_run( 432 conn, "pip install aiomqtt --break-system-packages -q 2>&1" 433 ) 434 if not ok: 435 self._log_remote(f"[{node_name}] pip install warning: {out[:150]}") 436 else: 437 self._log_remote(f"[{node_name}] aiomqtt installed.") 438 439 # 4. Kill any existing instance with this node name 440 await self._ssh_run( 441 conn, 442 f"pkill -f 'remote_runner.py.*--name {node_name}' 2>/dev/null; true" 443 ) 444 445 # 5. Start runner in the background 446 cmd = ( 447 f"nohup python3 ~/wactorz/remote_runner.py " 448 f"--broker {broker} --port {mqtt_port} --name {node_name} " 449 f"> ~/wactorz/{node_name}.log 2>&1 &" 450 ) 451 await self._ssh_run(conn, cmd) 452 self._log_remote(f"[{node_name}] Runner started.") 453 454 self._log_remote( 455 f"[{node_name}] Deploy complete! Node will appear in /nodes within 15s." 456 ) 457 return { 458 "success": True, 459 "node_name": node_name, 460 "host": host, 461 "broker": broker, 462 "message": ( 463 f"Node '{node_name}' deployed to {user}@{host}. " 464 f"It will appear in /nodes within ~15 seconds." 465 ), 466 } 467 468 except Exception as e: 469 msg = f"Deploy failed for '{node_name}' on {host}: {e}" 470 self._log_remote(msg) 471 return {"success": False, "node_name": node_name, "host": host, "error": str(e)} 472 473 async def _node_run(self, payload: dict) -> dict: 474 """ 475 Run an arbitrary shell command on a remote node via SSH. 476 477 payload keys: 478 host — IP or hostname 479 user — SSH username (default: "pi") 480 command — shell command to run 481 password / key_path — auth (optional) 482 """ 483 try: 484 import asyncssh 485 except ImportError: 486 return {"error": "asyncssh not installed. Run: pip install asyncssh"} 487 488 host = payload.get("host") 489 command = payload.get("command", "echo hello") 490 if not host: 491 return {"error": "Missing 'host' in payload"} 492 493 self._log_remote(f"Running on {host}: {command[:80]}") 494 try: 495 async with asyncssh.connect(**self._ssh_kwargs(payload)) as conn: 496 ok, output = await self._ssh_run(conn, command) 497 return { 498 "success": ok, 499 "host": host, 500 "command": command, 501 "output": output, 502 "exit_code": 0 if ok else 1, 503 } 504 except Exception as e: 505 return {"success": False, "host": host, "error": str(e)}
Pre-defined agent that installs Python packages on demand. Uses sys.executable so packages are installed into the active venv.
89 async def on_start(self): 90 logger.info(f"[{self.name}] Installer ready — using: {sys.executable}") 91 await self._mqtt_publish( 92 f"agents/{self.actor_id}/logs", 93 {"type": "log", "message": f"Installer ready ({sys.executable})", "timestamp": time.time()}, 94 ) 95 await self.publish_manifest( 96 description="Installs Python packages on demand via pip", 97 capabilities=["pip_install", "package_management"], 98 )
Called when actor starts. Override for init logic.
100 async def handle_message(self, msg: Message): 101 if msg.type == MessageType.TASK: 102 result = await self._handle_install(msg) 103 # Echo task_id back so caller's future can resolve 104 if isinstance(msg.payload, dict): 105 task_id = msg.payload.get("task") or msg.payload.get("_task_id") 106 if task_id: 107 result["task"] = task_id 108 result["_task_id"] = task_id 109 target = msg.reply_to or msg.sender_id 110 if target: 111 await self.send(target, MessageType.RESULT, result)
Handle messages not caught by default handlers.
139class CatalogAgent(Actor): 140 """ 141 Pre-built agent recipe library. 142 Spawns any catalog agent on request by delegating to main's spawn pipeline. 143 """ 144 145 def __init__(self, **kwargs): 146 kwargs.setdefault("name", "catalog") 147 super().__init__(**kwargs) 148 self.protected = True 149 self._catalog = _build_catalog() 150 151 # ── Lifecycle ────────────────────────────────────────────────────────────── 152 153 async def on_start(self): 154 names = list(self._catalog.keys()) 155 logger.info(f"[{self.name}] Catalog ready — {len(names)} recipe(s): {names}") 156 await self._mqtt_publish( 157 f"agents/{self.actor_id}/logs", 158 {"type": "log", 159 "message": f"Catalog ready: {', '.join(names)}", 160 "timestamp": time.time()}, 161 ) 162 163 # Publish one manifest for the catalog agent itself 164 await self.publish_manifest( 165 description=( 166 "Pre-built agent recipe library. " 167 "Spawns ready-made agents by name without requiring code. " 168 f"Available: {', '.join(names)}" 169 ), 170 capabilities=["spawn_catalog_agent", "list_catalog_agents", "agent_catalog"], 171 input_schema={"action": "str — 'spawn' | 'list' | 'info'", 172 "agent": "str — agent name for spawn/info actions"}, 173 output_schema={"ok": "bool", "message": "str", 174 "agents": "list", "recipe": "dict"}, 175 ) 176 177 # Inject recipe manifests directly into main's _agent_manifests dict. 178 # Retry briefly since catalog and main start concurrently. 179 import time as _t 180 181 # Wait for main to be ready (up to 10s) 182 main = None 183 for _ in range(20): 184 main = self._registry.find_by_name("main") if self._registry else None 185 if main and hasattr(main, "_agent_manifests"): 186 break 187 await asyncio.sleep(0.5) 188 189 for name, recipe in self._catalog.items(): 190 manifest = { 191 "name": name, 192 "actor_id": f"catalog.{name}", 193 "description": recipe.get("description", ""), 194 "capabilities": recipe.get("capabilities", []), 195 "input_schema": recipe.get("input_schema", {}), 196 "output_schema": recipe.get("output_schema", {}), 197 "publishes": [], 198 "spawnable": True, 199 "catalog": self.name, 200 "timestamp": _t.time(), 201 } 202 203 if main and hasattr(main, "_agent_manifests"): 204 main._agent_manifests[name] = manifest 205 logger.info(f"[{self.name}] Injected manifest for '{name}' into main") 206 else: 207 logger.warning(f"[{self.name}] main not ready — could not inject manifest for '{name}'") 208 209 def _current_task_description(self) -> str: 210 return f"catalog ({len(self._catalog)} recipes)" 211 212 # ── Message handling ─────────────────────────────────────────────────────── 213 214 async def handle_message(self, msg: Message): 215 if msg.type != MessageType.TASK: 216 return 217 218 payload = msg.payload if msg.payload is not None else {} 219 result = await self._handle(payload) 220 221 # Echo task_id so caller futures resolve 222 task_id = payload.get("task") or payload.get("_task_id") if isinstance(payload, dict) else None 223 if task_id: 224 result["task"] = task_id 225 result["_task_id"] = task_id 226 227 target = msg.reply_to or msg.sender_id 228 if target: 229 await self.send(target, MessageType.RESULT, result) 230 231 async def _handle(self, payload) -> dict: 232 # Normalise to text first, then parse. 233 # Payloads arrive in three forms: 234 # "spawn doc-to-pptx-agent" ← raw string 235 # {"text": "spawn doc-to-pptx-agent"} ← delegate_task() wrapping 236 # {"action": "spawn", "agent": "..."} ← structured dict 237 238 # ── Structured dict with explicit action key ─────────────────────── 239 if isinstance(payload, dict) and payload.get("action"): 240 action = payload["action"].lower().strip() 241 if action == "list": 242 return self._action_list() 243 if action == "info": 244 return self._action_info(payload.get("agent", "")) 245 if action == "spawn": 246 return await self._action_spawn(payload.get("agent", ""), payload) 247 return {"ok": False, "message": f"Unknown action '{action}'. Use: spawn | list | info"} 248 249 # ── Convenience dict shortcuts ───────────────────────────────────── 250 if isinstance(payload, dict) and "spawn" in payload and isinstance(payload["spawn"], str): 251 return await self._action_spawn(payload["spawn"], payload) 252 253 # ── Extract text from any remaining form ─────────────────────────── 254 if isinstance(payload, str): 255 text = payload.strip() 256 elif isinstance(payload, dict): 257 text = (payload.get("text") or payload.get("message") or payload.get("query") or "").strip() 258 else: 259 text = "" 260 261 # ── Parse "verb agent-name" ──────────────────────────────────────── 262 if text: 263 parts = text.split(None, 1) 264 cmd = parts[0].lower() 265 arg = parts[1].strip() if len(parts) > 1 else "" 266 if cmd == "list": 267 return self._action_list() 268 if cmd == "info": 269 return self._action_info(arg) 270 if cmd == "spawn": 271 return await self._action_spawn(arg, {}) 272 # Bare agent name with no verb → treat as spawn 273 if cmd in self._catalog: 274 return await self._action_spawn(cmd, {}) 275 276 # ── Nothing parseable → helpful default ─────────────────────────── 277 return self._action_list() 278 279 # ── Actions ──────────────────────────────────────────────────────────────── 280 281 def _action_list(self) -> dict: 282 agents = [] 283 for name, recipe in self._catalog.items(): 284 agents.append({ 285 "name": name, 286 "description": recipe.get("description", ""), 287 "capabilities": recipe.get("capabilities", []), 288 }) 289 return { 290 "ok": True, 291 "message": f"{len(agents)} agent(s) available in catalog", 292 "agents": agents, 293 } 294 295 def _action_info(self, name: str) -> dict: 296 if not name: 297 return {"ok": False, "message": "Provide 'agent' name for info action"} 298 recipe = self._catalog.get(name) 299 if not recipe: 300 available = list(self._catalog.keys()) 301 return {"ok": False, "message": f"'{name}' not in catalog. Available: {available}"} 302 # Return recipe without the full code string (too large for a response) 303 safe = {k: v for k, v in recipe.items() if k != "code"} 304 return {"ok": True, "message": f"Recipe for '{name}'", "recipe": safe} 305 306 async def _action_spawn(self, name: str, payload: dict) -> dict: 307 if not name: 308 return {"ok": False, "message": "Provide 'agent' name to spawn"} 309 310 recipe = self._catalog.get(name) 311 if not recipe: 312 available = list(self._catalog.keys()) 313 return {"ok": False, "message": f"'{name}' not in catalog. Available: {available}"} 314 315 if not self._registry: 316 return {"ok": False, "message": "No registry available — cannot spawn"} 317 318 # If already running, return success immediately 319 existing = self._registry.find_by_name(name) 320 if existing: 321 return {"ok": True, "message": f"'{name}' is already running"} 322 323 logger.info(f"[{self.name}] Spawning '{name}'...") 324 await self._mqtt_publish( 325 f"agents/{self.actor_id}/logs", 326 {"type": "log", "message": f"Spawning '{name}'...", "timestamp": time.time()}, 327 ) 328 329 try: 330 from .dynamic_agent import DynamicAgent 331 332 # ── Auto-install Python dependencies ─────────────────────────── 333 install = recipe.get("install", []) 334 if install: 335 installer = self._registry.find_by_name("installer") if self._registry else None 336 if installer: 337 await agent.log(f"Installing deps for '{name}': {install}") if False else None 338 logger.info(f"[{self.name}] Installing deps for '{name}': {install}") 339 import uuid as _uuid 340 task_id = f"cat_install_{_uuid.uuid4().hex[:8]}" 341 future = asyncio.get_event_loop().create_future() 342 installer._result_futures = getattr(installer, "_result_futures", {}) 343 # Use main's result futures since installer replies there 344 main = self._registry.find_by_name("main") if self._registry else None 345 if main: 346 main._result_futures[task_id] = future 347 await self.send(installer.actor_id, MessageType.TASK, { 348 "action": "install", 349 "packages": install, 350 "task": task_id, 351 "_task_id": task_id, 352 }) 353 try: 354 await asyncio.wait_for(future, timeout=120.0) 355 except asyncio.TimeoutError: 356 logger.warning(f"[{self.name}] Install timeout for '{name}' — proceeding anyway") 357 else: 358 logger.warning(f"[{self.name}] installer agent not found — skipping dep install for '{name}'") 359 360 # Find main to get its llm_provider and persistence_dir 361 main = self._registry.find_by_name("main") 362 llm_provider = getattr(main, "llm", None) if main else None 363 persistence_dir = str(getattr(main, "_persistence_dir", "./state/main").parent) if main else "./state" 364 365 actor = await self.spawn( 366 DynamicAgent, 367 name = name, 368 code = recipe["code"], 369 poll_interval = float(recipe.get("poll_interval", 3600)), 370 description = recipe.get("description", ""), 371 input_schema = recipe.get("input_schema", {}), 372 output_schema = recipe.get("output_schema", {}), 373 llm_provider = llm_provider, 374 persistence_dir = persistence_dir, 375 ) 376 377 if actor: 378 # Save to main's spawn registry so it survives restarts 379 if main and hasattr(main, "_save_to_spawn_registry"): 380 main._save_to_spawn_registry(recipe) 381 382 msg = f"'{name}' spawned and running" 383 logger.info(f"[{self.name}] {msg}") 384 await self._mqtt_publish( 385 f"agents/{self.actor_id}/logs", 386 {"type": "log", "message": msg, "timestamp": time.time()}, 387 ) 388 return {"ok": True, "message": msg, "agent": name} 389 else: 390 return {"ok": False, "message": f"Spawn returned no actor for '{name}'"} 391 392 except Exception as e: 393 msg = f"Failed to spawn '{name}': {e}" 394 logger.error(f"[{self.name}] {msg}") 395 return {"ok": False, "message": msg} 396 397 # ── Public API for other agents ──────────────────────────────────────────── 398 399 def list_recipes(self) -> list[str]: 400 """Return names of all available recipes.""" 401 return list(self._catalog.keys()) 402 403 def get_recipe(self, name: str) -> Optional[dict]: 404 """Return full recipe dict (including code) or None.""" 405 return self._catalog.get(name)
Pre-built agent recipe library. Spawns any catalog agent on request by delegating to main's spawn pipeline.
153 async def on_start(self): 154 names = list(self._catalog.keys()) 155 logger.info(f"[{self.name}] Catalog ready — {len(names)} recipe(s): {names}") 156 await self._mqtt_publish( 157 f"agents/{self.actor_id}/logs", 158 {"type": "log", 159 "message": f"Catalog ready: {', '.join(names)}", 160 "timestamp": time.time()}, 161 ) 162 163 # Publish one manifest for the catalog agent itself 164 await self.publish_manifest( 165 description=( 166 "Pre-built agent recipe library. " 167 "Spawns ready-made agents by name without requiring code. " 168 f"Available: {', '.join(names)}" 169 ), 170 capabilities=["spawn_catalog_agent", "list_catalog_agents", "agent_catalog"], 171 input_schema={"action": "str — 'spawn' | 'list' | 'info'", 172 "agent": "str — agent name for spawn/info actions"}, 173 output_schema={"ok": "bool", "message": "str", 174 "agents": "list", "recipe": "dict"}, 175 ) 176 177 # Inject recipe manifests directly into main's _agent_manifests dict. 178 # Retry briefly since catalog and main start concurrently. 179 import time as _t 180 181 # Wait for main to be ready (up to 10s) 182 main = None 183 for _ in range(20): 184 main = self._registry.find_by_name("main") if self._registry else None 185 if main and hasattr(main, "_agent_manifests"): 186 break 187 await asyncio.sleep(0.5) 188 189 for name, recipe in self._catalog.items(): 190 manifest = { 191 "name": name, 192 "actor_id": f"catalog.{name}", 193 "description": recipe.get("description", ""), 194 "capabilities": recipe.get("capabilities", []), 195 "input_schema": recipe.get("input_schema", {}), 196 "output_schema": recipe.get("output_schema", {}), 197 "publishes": [], 198 "spawnable": True, 199 "catalog": self.name, 200 "timestamp": _t.time(), 201 } 202 203 if main and hasattr(main, "_agent_manifests"): 204 main._agent_manifests[name] = manifest 205 logger.info(f"[{self.name}] Injected manifest for '{name}' into main") 206 else: 207 logger.warning(f"[{self.name}] main not ready — could not inject manifest for '{name}'")
Called when actor starts. Override for init logic.
214 async def handle_message(self, msg: Message): 215 if msg.type != MessageType.TASK: 216 return 217 218 payload = msg.payload if msg.payload is not None else {} 219 result = await self._handle(payload) 220 221 # Echo task_id so caller futures resolve 222 task_id = payload.get("task") or payload.get("_task_id") if isinstance(payload, dict) else None 223 if task_id: 224 result["task"] = task_id 225 result["_task_id"] = task_id 226 227 target = msg.reply_to or msg.sender_id 228 if target: 229 await self.send(target, MessageType.RESULT, result)
Handle messages not caught by default handlers.
428def HomeAssistantHardwareAgent(*_hw_args, **_hw_kwargs): # type: ignore[no-redef] 429 _warnings_hw.warn( 430 "HomeAssistantHardwareAgent is deprecated and will be removed in a future release. " 431 "Use HomeAssistantAgent instead.", 432 DeprecationWarning, 433 stacklevel=2, 434 ) 435 _hw_kwargs.setdefault("name", "home-assistant-agent") 436 from .home_assistant_agent import HomeAssistantAgent as _HA # noqa: PLC0415 437 return _HA(*_hw_args, **_hw_kwargs)