wactorz

Wactorz - Actor-Model Multi-Agent Framework

 1"""Wactorz - Actor-Model Multi-Agent Framework"""
 2from ._version import __version__
 3from .core.actor import Actor, ActorState, Message, MessageType
 4from .core.registry import ActorSystem, ActorRegistry
 5__all__ = [
 6    "__version__",
 7    "Actor", "ActorState", "Message", "MessageType",
 8    "ActorSystem", "ActorRegistry",
 9]
10# Optional agents — only exported when their dependencies are available.
11try:
12    from .agents.llm_agent import LLMAgent, AnthropicProvider, OpenAIProvider, OllamaProvider, NIMProvider
13    __all__ += ["LLMAgent", "AnthropicProvider", "OpenAIProvider", "OllamaProvider", "NIMProvider"]
14except ImportError:
15    pass
16try:
17    from .agents.main_actor import MainActor
18    from .agents.monitor_agent import MonitorActor
19    from .agents.manual_agent import ManualAgent
20    from .agents.planner_agent import PlannerAgent
21    from .agents.dynamic_agent import DynamicAgent
22    from .agents.installer_agent import InstallerAgent
23    from .agents.catalog_agent import CatalogAgent
24    __all__ += ["MainActor", "MonitorActor", "CodeAgent", "ManualAgent", "PlannerAgent",
25                "DynamicAgent", "InstallerAgent", "CatalogAgent"]
26except ImportError:
27    pass
28#try:
29#    from .agents.ml_agent import MLAgent, YOLOAgent, AnomalyDetectorAgent
30#    __all__ += ["MLAgent", "YOLOAgent", "AnomalyDetectorAgent"]
31#except ImportError:
32#    pass
33try:
34    from .agents.home_assistant_hardware_agent import HomeAssistantHardwareAgent
35    __all__ += ["HomeAssistantHardwareAgent"]
36except ImportError:
37    pass
__version__ = '0.2.0'
class Actor(abc.ABC):
103class Actor(ABC):
104    """
105    Base Actor class. All agents inherit from this.
106    Actors are fully async and communicate only through messages.
107    """
108
109    def __init__(
110        self,
111        actor_id: Optional[str] = None,
112        name: Optional[str] = None,
113        persistence_dir: str = "./actor_state",
114        mailbox_size: int = 1000,
115    ):
116        if actor_id:
117            self.actor_id = actor_id
118        elif name:
119            # Deterministic UUID from name — same name always gets same ID across restarts
120            self.actor_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"agentflow.actor.{name}"))
121        else:
122            self.actor_id = str(uuid.uuid4())
123        self.name = name or f"actor-{self.actor_id[:8]}"
124        self.state = ActorState.IDLE
125        self.metrics = ActorMetrics()
126
127        # Async mailbox (inbox)
128        self._mailbox: asyncio.Queue = asyncio.Queue(maxsize=mailbox_size)
129        self._outbox: dict[str, asyncio.Queue] = {}  # actor_id -> queue ref
130
131        # Registry reference (set by ActorSystem)
132        self._registry: Optional["ActorRegistry"] = None
133        self._mqtt_client: Optional[Any] = None
134        self._mqtt_broker: str = "localhost"
135        self._mqtt_port: int = 1883
136
137        # Persistence
138        # Use name as persistence folder so it survives restarts with same name
139        # Falls back to actor_id for anonymous actors
140        safe_name = self.name.replace("/", "_").replace("\\", "_")
141        self._persistence_dir = Path(persistence_dir) / safe_name
142        self._persistence_dir.mkdir(parents=True, exist_ok=True)
143        self._persistent_state: dict = {}
144
145        # Protection — if True, stop/delete/pause commands are ignored
146        self.protected: bool = False
147
148        # Supervisor reference — set by Supervisor when this actor is registered under it
149        self.supervisor_id: Optional[str] = None
150
151        # Handlers
152        self._handlers: dict[MessageType, Callable] = {}
153        self._setup_default_handlers()
154
155        # Background tasks
156        self._tasks: list[asyncio.Task] = []
157
158        logger.info(f"[{self.name}] Actor created with id={self.actor_id}")
159
160    # ─── Lifecycle ────────────────────────────────────────────────────────────
161
162    async def start(self):
163        """Start the actor's event loop."""
164        self.state = ActorState.RUNNING
165        self.metrics.start_time = time.time()
166        await self._load_persistent_state()
167        await self.on_start()
168        self._tasks.append(asyncio.create_task(self._message_loop()))
169        self._tasks.append(asyncio.create_task(self._heartbeat_loop()))
170        self._tasks.append(asyncio.create_task(self._command_listener()))
171        await self._publish_status()
172        logger.info(f"[{self.name}] Actor started.")
173
174    async def stop(self):
175        """Gracefully stop the actor."""
176        self.state = ActorState.STOPPED
177        for task in self._tasks:
178            task.cancel()
179        await self.on_stop()                  # on_stop() calls persist() first
180        await self._save_persistent_state()   # THEN save to disk
181        await self._publish_status()
182        logger.info(f"[{self.name}] Actor stopped.")
183
184    async def pause(self):
185        self.state = ActorState.PAUSED
186        await self._publish_status()
187
188    async def resume(self):
189        self.state = ActorState.RUNNING
190        await self._publish_status()
191
192    # ─── Message Loop ─────────────────────────────────────────────────────────
193
194    async def _message_loop(self):
195        """Main message processing loop."""
196        while self.state not in (ActorState.STOPPED, ActorState.FAILED):
197            try:
198                if self.state == ActorState.PAUSED:
199                    await asyncio.sleep(0.1)
200                    continue
201
202                msg = await asyncio.wait_for(self._mailbox.get(), timeout=1.0)
203                # Only count meaningful messages — not heartbeats, status pings, lifecycle
204                _noise = {MessageType.HEARTBEAT, MessageType.STATUS_REQUEST,
205                          MessageType.STATUS_RESPONSE, MessageType.STOP,
206                          MessageType.PAUSE, MessageType.RESUME}
207                if msg.type not in _noise:
208                    self.metrics.messages_processed += 1
209                await self._dispatch(msg)
210                self._mailbox.task_done()
211
212            except asyncio.TimeoutError:
213                continue
214            except asyncio.CancelledError:
215                break
216            except Exception as e:
217                self.metrics.errors += 1
218                logger.error(f"[{self.name}] Error in message loop: {e}", exc_info=True)
219
220    async def _dispatch(self, msg: Message):
221        """Dispatch message to the appropriate handler."""
222        handler = self._handlers.get(msg.type)
223        if handler:
224            await handler(msg)
225        else:
226            await self.handle_message(msg)
227
228    def _setup_default_handlers(self):
229        self._handlers = {
230            MessageType.STOP: self._handle_stop,
231            MessageType.PAUSE: self._handle_pause,
232            MessageType.RESUME: self._handle_resume,
233            MessageType.STATUS_REQUEST: self._handle_status_request,
234            MessageType.HEARTBEAT: self._handle_heartbeat_msg,
235        }
236
237    async def _handle_stop(self, msg: Message):
238        await self.stop()
239
240    async def _handle_pause(self, msg: Message):
241        await self.pause()
242
243    async def _handle_resume(self, msg: Message):
244        await self.resume()
245
246    async def _handle_status_request(self, msg: Message):
247        status = self.get_status()
248        # Reply to sender_id (always), reply_to is optional override
249        target = msg.reply_to or msg.sender_id
250        if target:
251            await self.send(target, MessageType.STATUS_RESPONSE, status)
252
253    async def _handle_heartbeat_msg(self, msg: Message):
254        pass  # Monitor actor handles these
255
256    # ─── Heartbeat ────────────────────────────────────────────────────────────
257
258    async def _heartbeat_loop(self, interval: float = 10.0):
259        """Periodically publish heartbeat via MQTT."""
260        # Publish immediately on start so monitor sees agent right away
261        await asyncio.sleep(0.5)
262        await self._mqtt_publish(f"agents/{self.actor_id}/heartbeat", self._build_heartbeat())
263        await self._mqtt_publish(f"agents/{self.actor_id}/metrics", self._build_metrics())
264        while self.state not in (ActorState.STOPPED, ActorState.FAILED):
265            try:
266                await asyncio.sleep(interval)
267                hb = self._build_heartbeat()
268                self.metrics.last_heartbeat = time.time()
269                await self._mqtt_publish(f"agents/{self.actor_id}/heartbeat", hb)
270                await self._mqtt_publish(f"agents/{self.actor_id}/metrics", self._build_metrics())
271            except asyncio.CancelledError:
272                break
273            except Exception as e:
274                logger.warning(f"[{self.name}] Heartbeat error: {e}")
275
276    def _build_heartbeat(self) -> dict:
277        proc = psutil.Process()
278        return {
279            "actor_id":  self.actor_id,
280            "name":      self.name,
281            "timestamp": time.time(),
282            "state":     self.state.value,
283            "cpu":       proc.cpu_percent(interval=0.1),
284            "memory_mb": proc.memory_info().rss / 1024 / 1024,
285            "task":      self._current_task_description(),
286            "protected": self.protected,
287        }
288
289    def _build_metrics(self) -> dict:
290        return {
291            "actor_id": self.actor_id,
292            "messages_processed": self.metrics.messages_processed,
293            "errors": self.metrics.errors,
294            "uptime": self.metrics.uptime,
295            "tasks_completed": self.metrics.tasks_completed,
296            "tasks_failed": self.metrics.tasks_failed,
297            "restart_count": self.metrics.restart_count,
298        }
299
300    async def _command_listener(self):
301        """Listen for commands published to agents/{id}/commands via MQTT."""
302        try:
303            import aiomqtt
304        except ImportError:
305            return
306
307        topic = f"agents/{self.actor_id}/commands"
308        while self.state not in (ActorState.STOPPED, ActorState.FAILED):
309            try:
310                async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client:
311                    await client.subscribe(topic)
312                    logger.debug(f"[{self.name}] Subscribed to {topic}")
313                    async for message in client.messages:
314                        try:
315                            data    = json.loads(message.payload.decode())
316                            command = data.get("command", "")
317                            logger.info(f"[{self.name}] Received command: {command}")
318                            if self.protected and command in ("stop", "pause", "delete"):
319                                logger.warning(f"[{self.name}] Ignoring '{command}' — actor is protected.")
320                                continue
321                            if command == "stop":
322                                await self.stop()
323                                return
324                            elif command == "pause":
325                                await self.pause()
326                            elif command == "resume":
327                                await self.resume()
328                            elif command == "delete":
329                                # If main actor knows about this agent, remove from spawn registry
330                                if self._registry:
331                                    main = self._registry.find_by_name("main")
332                                    if main and hasattr(main, "_remove_from_spawn_registry"):
333                                        main._remove_from_spawn_registry(self.name)
334                                    await self._registry.unregister(self.actor_id)
335                                await self.stop()
336                                return
337                        except Exception as e:
338                            logger.error(f"[{self.name}] Command parse error: {e}")
339            except asyncio.CancelledError:
340                break
341            except Exception as e:
342                if self.state not in (ActorState.STOPPED, ActorState.FAILED):
343                    await asyncio.sleep(5)
344
345    def _current_task_description(self) -> str:
346        return "idle"  # Override in subclasses
347
348    # ─── Messaging ────────────────────────────────────────────────────────────
349
350    async def send(self, target_id: str, msg_type: MessageType, payload: Any = None) -> bool:
351        """Send a message to another actor."""
352        if self._registry is None:
353            logger.warning(f"[{self.name}] No registry attached, cannot send messages.")
354            return False
355        msg = Message(type=msg_type, sender_id=self.actor_id, payload=payload)
356        return await self._registry.deliver(target_id, msg)
357
358    async def broadcast(self, msg_type: MessageType, payload: Any = None):
359        """Broadcast to all registered actors."""
360        if self._registry:
361            await self._registry.broadcast(self.actor_id, msg_type, payload)
362
363    async def receive(self, msg: Message):
364        """External entry point - put message in mailbox."""
365        await self._mailbox.put(msg)
366
367    # ─── Actor Spawning ───────────────────────────────────────────────────────
368
369    async def spawn(self, actor_class: type, **kwargs) -> "Actor":
370        """
371        Spawn a child actor. The child inherits:
372        - MQTT client (so it can publish heartbeats/status)
373        - Registry (so it can send/receive messages)
374        - Persistence dir defaults to same root
375        """
376        # Default persistence to same root as parent
377        kwargs.setdefault("persistence_dir", str(self._persistence_dir.parent))
378
379        child = actor_class(**kwargs)
380
381        # Inherit everything from parent
382        child._mqtt_client  = self._mqtt_client   # MQTT publish connection
383        child._mqtt_broker  = self._mqtt_broker   # broker address for command listener
384        child._mqtt_port    = self._mqtt_port     # broker port
385        child._registry     = self._registry      # message routing
386
387        # Register in registry
388        if self._registry:
389            await self._registry.register(child)
390
391        # Start the child
392        await child.start()
393
394        # Immediately announce to monitor - don't wait for heartbeat loop
395        await child._publish_status()
396        await child._mqtt_publish(
397            f"agents/{child.actor_id}/heartbeat",
398            child._build_heartbeat(),
399        )
400        await child._mqtt_publish(
401            f"agents/{child.actor_id}/metrics",
402            child._build_metrics(),
403        )
404
405        # Notify parent's topic that it spawned a child
406        await self._mqtt_publish(
407            f"agents/{self.actor_id}/spawned",
408            {"child_id": child.actor_id, "child_name": child.name, "timestamp": time.time()},
409        )
410        logger.info(f"[{self.name}] Spawned: {child.name} ({child.actor_id[:8]})")
411        return child
412
413    # ─── Persistence ──────────────────────────────────────────────────────────
414
415    async def _save_persistent_state(self):
416        path = self._persistence_dir / "state.pkl"
417        try:
418            with open(path, "wb") as f:
419                pickle.dump(self._persistent_state, f)
420        except Exception as e:
421            logger.error(f"[{self.name}] Failed to save state: {e}")
422
423    async def _load_persistent_state(self):
424        path = self._persistence_dir / "state.pkl"
425        if path.exists():
426            try:
427                with open(path, "rb") as f:
428                    self._persistent_state = pickle.load(f)
429                logger.info(f"[{self.name}] Loaded persistent state.")
430            except Exception as e:
431                logger.error(f"[{self.name}] Failed to load state: {e}")
432
433    def persist(self, key: str, value: Any):
434        self._persistent_state[key] = value
435        # Write to disk immediately so state survives Ctrl+C and crashes
436        path = self._persistence_dir / "state.pkl"
437        try:
438            with open(path, "wb") as f:
439                pickle.dump(self._persistent_state, f)
440        except Exception as e:
441            logger.debug(f"[{self.name}] persist write failed: {e}")
442        # Save to disk immediately so state survives crashes and Ctrl+C
443        path = self._persistence_dir / "state.pkl"
444        try:
445            import pickle as _pickle
446            with open(path, "wb") as f:
447                _pickle.dump(self._persistent_state, f)
448        except Exception as e:
449            logger.debug(f"[{self.name}] persist write failed: {e}")
450
451    def recall(self, key: str, default: Any = None) -> Any:
452        return self._persistent_state.get(key, default)
453
454    # ─── MQTT ─────────────────────────────────────────────────────────────────
455
456    async def _mqtt_publish(self, topic: str, payload: Any, retain: bool = False, qos: int = 0):
457        if self._mqtt_client:
458            try:
459                await self._mqtt_client.publish(topic, json.dumps(payload), retain=retain, qos=qos)
460            except Exception as e:
461                logger.debug(f"[{self.name}] MQTT publish failed: {e}")
462
463    async def _publish_status(self):
464        await self._mqtt_publish(f"agents/{self.actor_id}/status", self.get_status())
465
466    # ─── Status ───────────────────────────────────────────────────────────────
467
468    def get_status(self) -> dict:
469        return {
470            "actor_id": self.actor_id,
471            "name": self.name,
472            "state": self.state.value,
473            "uptime": self.metrics.uptime,
474            "messages_processed": self.metrics.messages_processed,
475            "restart_count": self.metrics.restart_count,
476            "supervised": self.supervisor_id is not None,
477        }
478
479    # ─── Abstract / Override ──────────────────────────────────────────────────
480
481    async def on_start(self):
482        """Called when actor starts. Override for init logic."""
483        pass
484
485    async def publish_manifest(self, description: str = "", publishes: list = None,
486                                capabilities: list = None, input_schema: dict = None,
487                                output_schema: dict = None):
488        """
489        Publish a capability manifest so main's topic registry can discover this actor.
490        Call from on_start() in any actor that wants to be discoverable.
491        Manifests are retained — main sees them immediately even after restart.
492
493        input_schema / output_schema — dicts describing expected payload fields, e.g.:
494            input_schema  = {"city": "str — city name to fetch weather for"}
495            output_schema = {"temp_c": "float", "condition": "str", "humidity": "int"}
496        """
497        import time as _t
498        manifest = {
499            "name":          self.name,
500            "actor_id":      self.actor_id,
501            "description":   description,
502            "publishes":     publishes or [],
503            "capabilities":  capabilities or [],
504            "input_schema":  input_schema or {},
505            "output_schema": output_schema or {},
506            "timestamp":     _t.time(),
507        }
508        await self._mqtt_publish(f"agents/{self.actor_id}/manifest", manifest, retain=True)
509
510    async def on_stop(self):
511        """Called when actor stops. Override for cleanup."""
512        pass
513
514    @abstractmethod
515    async def handle_message(self, msg: Message):
516        """Handle messages not caught by default handlers."""
517        pass
518
519    def __repr__(self):
520        return f"<Actor name={self.name} id={self.actor_id[:8]} state={self.state.value}>"

Base Actor class. All agents inherit from this. Actors are fully async and communicate only through messages.

name
state
metrics
protected: bool
supervisor_id: Optional[str]
async def start(self):
162    async def start(self):
163        """Start the actor's event loop."""
164        self.state = ActorState.RUNNING
165        self.metrics.start_time = time.time()
166        await self._load_persistent_state()
167        await self.on_start()
168        self._tasks.append(asyncio.create_task(self._message_loop()))
169        self._tasks.append(asyncio.create_task(self._heartbeat_loop()))
170        self._tasks.append(asyncio.create_task(self._command_listener()))
171        await self._publish_status()
172        logger.info(f"[{self.name}] Actor started.")

Start the actor's event loop.

async def stop(self):
174    async def stop(self):
175        """Gracefully stop the actor."""
176        self.state = ActorState.STOPPED
177        for task in self._tasks:
178            task.cancel()
179        await self.on_stop()                  # on_stop() calls persist() first
180        await self._save_persistent_state()   # THEN save to disk
181        await self._publish_status()
182        logger.info(f"[{self.name}] Actor stopped.")

Gracefully stop the actor.

async def pause(self):
184    async def pause(self):
185        self.state = ActorState.PAUSED
186        await self._publish_status()
async def resume(self):
188    async def resume(self):
189        self.state = ActorState.RUNNING
190        await self._publish_status()
async def send( self, target_id: str, msg_type: MessageType, payload: Any = None) -> bool:
350    async def send(self, target_id: str, msg_type: MessageType, payload: Any = None) -> bool:
351        """Send a message to another actor."""
352        if self._registry is None:
353            logger.warning(f"[{self.name}] No registry attached, cannot send messages.")
354            return False
355        msg = Message(type=msg_type, sender_id=self.actor_id, payload=payload)
356        return await self._registry.deliver(target_id, msg)

Send a message to another actor.

async def broadcast(self, msg_type: MessageType, payload: Any = None):
358    async def broadcast(self, msg_type: MessageType, payload: Any = None):
359        """Broadcast to all registered actors."""
360        if self._registry:
361            await self._registry.broadcast(self.actor_id, msg_type, payload)

Broadcast to all registered actors.

async def receive(self, msg: Message):
363    async def receive(self, msg: Message):
364        """External entry point - put message in mailbox."""
365        await self._mailbox.put(msg)

External entry point - put message in mailbox.

async def spawn(self, actor_class: type, **kwargs) -> Actor:
369    async def spawn(self, actor_class: type, **kwargs) -> "Actor":
370        """
371        Spawn a child actor. The child inherits:
372        - MQTT client (so it can publish heartbeats/status)
373        - Registry (so it can send/receive messages)
374        - Persistence dir defaults to same root
375        """
376        # Default persistence to same root as parent
377        kwargs.setdefault("persistence_dir", str(self._persistence_dir.parent))
378
379        child = actor_class(**kwargs)
380
381        # Inherit everything from parent
382        child._mqtt_client  = self._mqtt_client   # MQTT publish connection
383        child._mqtt_broker  = self._mqtt_broker   # broker address for command listener
384        child._mqtt_port    = self._mqtt_port     # broker port
385        child._registry     = self._registry      # message routing
386
387        # Register in registry
388        if self._registry:
389            await self._registry.register(child)
390
391        # Start the child
392        await child.start()
393
394        # Immediately announce to monitor - don't wait for heartbeat loop
395        await child._publish_status()
396        await child._mqtt_publish(
397            f"agents/{child.actor_id}/heartbeat",
398            child._build_heartbeat(),
399        )
400        await child._mqtt_publish(
401            f"agents/{child.actor_id}/metrics",
402            child._build_metrics(),
403        )
404
405        # Notify parent's topic that it spawned a child
406        await self._mqtt_publish(
407            f"agents/{self.actor_id}/spawned",
408            {"child_id": child.actor_id, "child_name": child.name, "timestamp": time.time()},
409        )
410        logger.info(f"[{self.name}] Spawned: {child.name} ({child.actor_id[:8]})")
411        return child

Spawn a child actor. The child inherits:

  • MQTT client (so it can publish heartbeats/status)
  • Registry (so it can send/receive messages)
  • Persistence dir defaults to same root
def persist(self, key: str, value: Any):
433    def persist(self, key: str, value: Any):
434        self._persistent_state[key] = value
435        # Write to disk immediately so state survives Ctrl+C and crashes
436        path = self._persistence_dir / "state.pkl"
437        try:
438            with open(path, "wb") as f:
439                pickle.dump(self._persistent_state, f)
440        except Exception as e:
441            logger.debug(f"[{self.name}] persist write failed: {e}")
442        # Save to disk immediately so state survives crashes and Ctrl+C
443        path = self._persistence_dir / "state.pkl"
444        try:
445            import pickle as _pickle
446            with open(path, "wb") as f:
447                _pickle.dump(self._persistent_state, f)
448        except Exception as e:
449            logger.debug(f"[{self.name}] persist write failed: {e}")
def recall(self, key: str, default: Any = None) -> Any:
451    def recall(self, key: str, default: Any = None) -> Any:
452        return self._persistent_state.get(key, default)
def get_status(self) -> dict:
468    def get_status(self) -> dict:
469        return {
470            "actor_id": self.actor_id,
471            "name": self.name,
472            "state": self.state.value,
473            "uptime": self.metrics.uptime,
474            "messages_processed": self.metrics.messages_processed,
475            "restart_count": self.metrics.restart_count,
476            "supervised": self.supervisor_id is not None,
477        }
async def on_start(self):
481    async def on_start(self):
482        """Called when actor starts. Override for init logic."""
483        pass

Called when actor starts. Override for init logic.

async def publish_manifest( self, description: str = '', publishes: list = None, capabilities: list = None, input_schema: dict = None, output_schema: dict = None):
485    async def publish_manifest(self, description: str = "", publishes: list = None,
486                                capabilities: list = None, input_schema: dict = None,
487                                output_schema: dict = None):
488        """
489        Publish a capability manifest so main's topic registry can discover this actor.
490        Call from on_start() in any actor that wants to be discoverable.
491        Manifests are retained — main sees them immediately even after restart.
492
493        input_schema / output_schema — dicts describing expected payload fields, e.g.:
494            input_schema  = {"city": "str — city name to fetch weather for"}
495            output_schema = {"temp_c": "float", "condition": "str", "humidity": "int"}
496        """
497        import time as _t
498        manifest = {
499            "name":          self.name,
500            "actor_id":      self.actor_id,
501            "description":   description,
502            "publishes":     publishes or [],
503            "capabilities":  capabilities or [],
504            "input_schema":  input_schema or {},
505            "output_schema": output_schema or {},
506            "timestamp":     _t.time(),
507        }
508        await self._mqtt_publish(f"agents/{self.actor_id}/manifest", manifest, retain=True)

Publish a capability manifest so main's topic registry can discover this actor. Call from on_start() in any actor that wants to be discoverable. Manifests are retained — main sees them immediately even after restart.

input_schema / output_schema — dicts describing expected payload fields, e.g.: input_schema = {"city": "str — city name to fetch weather for"} output_schema = {"temp_c": "float", "condition": "str", "humidity": "int"}

async def on_stop(self):
510    async def on_stop(self):
511        """Called when actor stops. Override for cleanup."""
512        pass

Called when actor stops. Override for cleanup.

@abstractmethod
async def handle_message(self, msg: Message):
514    @abstractmethod
515    async def handle_message(self, msg: Message):
516        """Handle messages not caught by default handlers."""
517        pass

Handle messages not caught by default handlers.

class ActorState(builtins.str, enum.Enum):
42class ActorState(str, Enum):
43    IDLE = "idle"
44    RUNNING = "running"
45    PAUSED = "paused"
46    STOPPED = "stopped"
47    FAILED = "failed"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.

IDLE = <ActorState.IDLE: 'idle'>
RUNNING = <ActorState.RUNNING: 'running'>
PAUSED = <ActorState.PAUSED: 'paused'>
STOPPED = <ActorState.STOPPED: 'stopped'>
FAILED = <ActorState.FAILED: 'failed'>
@dataclass
class Message:
68@dataclass
69class Message:
70    type: MessageType
71    sender_id: str
72    payload: Any = None
73    reply_to: Optional[str] = None
74    message_id: str = field(default_factory=lambda: str(uuid.uuid4()))
75    timestamp: float = field(default_factory=time.time)
76
77    def to_dict(self) -> dict:
78        return {
79            "type": self.type.value,
80            "sender_id": self.sender_id,
81            "payload": self.payload,
82            "reply_to": self.reply_to,
83            "message_id": self.message_id,
84            "timestamp": self.timestamp,
85        }
Message( type: MessageType, sender_id: str, payload: Any = None, reply_to: Optional[str] = None, message_id: str = <factory>, timestamp: float = <factory>)
type: MessageType
sender_id: str
payload: Any = None
reply_to: Optional[str] = None
message_id: str
timestamp: float
def to_dict(self) -> dict:
77    def to_dict(self) -> dict:
78        return {
79            "type": self.type.value,
80            "sender_id": self.sender_id,
81            "payload": self.payload,
82            "reply_to": self.reply_to,
83            "message_id": self.message_id,
84            "timestamp": self.timestamp,
85        }
class MessageType(builtins.str, enum.Enum):
50class MessageType(str, Enum):
51    # Lifecycle
52    START = "start"
53    STOP = "stop"
54    PAUSE = "pause"
55    RESUME = "resume"
56    DELETE = "delete"
57    # Communication
58    TASK = "task"
59    RESULT = "result"
60    HEARTBEAT = "heartbeat"
61    SPAWN = "spawn"
62    # Internal
63    TICK = "tick"
64    STATUS_REQUEST = "status_request"
65    STATUS_RESPONSE = "status_response"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.

START = <MessageType.START: 'start'>
STOP = <MessageType.STOP: 'stop'>
PAUSE = <MessageType.PAUSE: 'pause'>
RESUME = <MessageType.RESUME: 'resume'>
DELETE = <MessageType.DELETE: 'delete'>
TASK = <MessageType.TASK: 'task'>
RESULT = <MessageType.RESULT: 'result'>
HEARTBEAT = <MessageType.HEARTBEAT: 'heartbeat'>
SPAWN = <MessageType.SPAWN: 'spawn'>
TICK = <MessageType.TICK: 'tick'>
STATUS_REQUEST = <MessageType.STATUS_REQUEST: 'status_request'>
STATUS_RESPONSE = <MessageType.STATUS_RESPONSE: 'status_response'>
class ActorSystem:
344class ActorSystem:
345    """Top-level orchestrator."""
346
347    def __init__(self, mqtt_broker: str = "localhost", mqtt_port: int = 1883):
348        self.registry     = ActorRegistry()
349        self._mqtt_broker = mqtt_broker
350        self._mqtt_port   = mqtt_port
351        self._mqtt_client = None
352        self._running     = False
353        self._supervisor: Optional[Supervisor] = None
354
355    def _inject(self, actor: Actor):
356        """Inject MQTT client + broker/port into an actor so it can publish and subscribe."""
357        actor._mqtt_client = self._mqtt_client
358        actor._mqtt_broker = self._mqtt_broker
359        actor._mqtt_port   = self._mqtt_port
360
361    @property
362    def supervisor(self) -> Supervisor:
363        """Lazy-create the Supervisor bound to this system's registry and inject function."""
364        if self._supervisor is None:
365            self._supervisor = Supervisor(self.registry, self._inject)
366        return self._supervisor
367
368    async def start(self, *initial_actors: Actor):
369        self._running = True
370        self._mqtt_client = await _MQTTPublisher.create(self._mqtt_broker, self._mqtt_port)
371
372        for actor in initial_actors:
373            self._inject(actor)
374            await self.registry.register(actor)
375            await actor.start()
376
377        logger.info(f"[ActorSystem] Started with {len(initial_actors)} actors.")
378
379    async def spawn(self, actor_class: Type[Actor], **kwargs) -> Actor:
380        """Spawn and register a new actor in the system."""
381        actor = actor_class(**kwargs)
382        self._inject(actor)
383        await self.registry.register(actor)
384        await actor.start()
385        return actor
386
387    async def stop_all(self):
388        self._running = False
389        # Stop supervisor first so it doesn't try to restart actors we're about to stop
390        if self._supervisor:
391            await self._supervisor.stop()
392        actors = self.registry.all_actors()
393        await asyncio.gather(*[a.stop() for a in actors], return_exceptions=True)
394        if self._mqtt_client:
395            await self._mqtt_client.disconnect()
396        logger.info("[ActorSystem] All actors stopped.")
397
398    async def run_forever(self):
399        try:
400            while self._running:
401                await asyncio.sleep(1)
402        except (KeyboardInterrupt, asyncio.CancelledError):
403            logger.info("[ActorSystem] Shutdown signal received.")
404            await self.stop_all()

Top-level orchestrator.

ActorSystem(mqtt_broker: str = 'localhost', mqtt_port: int = 1883)
347    def __init__(self, mqtt_broker: str = "localhost", mqtt_port: int = 1883):
348        self.registry     = ActorRegistry()
349        self._mqtt_broker = mqtt_broker
350        self._mqtt_port   = mqtt_port
351        self._mqtt_client = None
352        self._running     = False
353        self._supervisor: Optional[Supervisor] = None
registry
supervisor: wactorz.core.registry.Supervisor
361    @property
362    def supervisor(self) -> Supervisor:
363        """Lazy-create the Supervisor bound to this system's registry and inject function."""
364        if self._supervisor is None:
365            self._supervisor = Supervisor(self.registry, self._inject)
366        return self._supervisor

Lazy-create the Supervisor bound to this system's registry and inject function.

async def start(self, *initial_actors: Actor):
368    async def start(self, *initial_actors: Actor):
369        self._running = True
370        self._mqtt_client = await _MQTTPublisher.create(self._mqtt_broker, self._mqtt_port)
371
372        for actor in initial_actors:
373            self._inject(actor)
374            await self.registry.register(actor)
375            await actor.start()
376
377        logger.info(f"[ActorSystem] Started with {len(initial_actors)} actors.")
async def spawn( self, actor_class: Type[Actor], **kwargs) -> Actor:
379    async def spawn(self, actor_class: Type[Actor], **kwargs) -> Actor:
380        """Spawn and register a new actor in the system."""
381        actor = actor_class(**kwargs)
382        self._inject(actor)
383        await self.registry.register(actor)
384        await actor.start()
385        return actor

Spawn and register a new actor in the system.

async def stop_all(self):
387    async def stop_all(self):
388        self._running = False
389        # Stop supervisor first so it doesn't try to restart actors we're about to stop
390        if self._supervisor:
391            await self._supervisor.stop()
392        actors = self.registry.all_actors()
393        await asyncio.gather(*[a.stop() for a in actors], return_exceptions=True)
394        if self._mqtt_client:
395            await self._mqtt_client.disconnect()
396        logger.info("[ActorSystem] All actors stopped.")
async def run_forever(self):
398    async def run_forever(self):
399        try:
400            while self._running:
401                await asyncio.sleep(1)
402        except (KeyboardInterrupt, asyncio.CancelledError):
403            logger.info("[ActorSystem] Shutdown signal received.")
404            await self.stop_all()
class ActorRegistry:
 61class ActorRegistry:
 62    """Maintains a map of all living actors and routes messages between them."""
 63
 64    def __init__(self):
 65        self._actors: dict[str, Actor] = {}
 66        self._lock = asyncio.Lock()
 67
 68    async def register(self, actor: Actor):
 69        async with self._lock:
 70            actor._registry = self
 71            self._actors[actor.actor_id] = actor
 72            logger.info(f"[Registry] Registered {actor.name} ({actor.actor_id[:8]})")
 73
 74    async def unregister(self, actor_id: str):
 75        async with self._lock:
 76            if actor_id in self._actors:
 77                del self._actors[actor_id]
 78                logger.info(f"[Registry] Unregistered {actor_id[:8]}")
 79
 80    async def deliver(self, target_id: str, msg: Message) -> bool:
 81        actor = self._actors.get(target_id)
 82        if actor is None:
 83            logger.warning(f"[Registry] Unknown target: {target_id[:8]}")
 84            return False
 85        await actor.receive(msg)
 86        return True
 87
 88    async def broadcast(self, sender_id: str, msg_type: MessageType, payload=None):
 89        msg = Message(type=msg_type, sender_id=sender_id, payload=payload)
 90        for actor_id, actor in list(self._actors.items()):
 91            if actor_id != sender_id:
 92                await actor.receive(msg)
 93
 94    def get(self, actor_id: str) -> Optional[Actor]:
 95        return self._actors.get(actor_id)
 96
 97    def all_actors(self) -> list[Actor]:
 98        return list(self._actors.values())
 99
100    def find_by_name(self, name: str) -> Optional[Actor]:
101        for actor in self._actors.values():
102            if actor.name == name:
103                return actor
104        return None
105
106    def __len__(self):
107        return len(self._actors)

Maintains a map of all living actors and routes messages between them.

async def register(self, actor: Actor):
68    async def register(self, actor: Actor):
69        async with self._lock:
70            actor._registry = self
71            self._actors[actor.actor_id] = actor
72            logger.info(f"[Registry] Registered {actor.name} ({actor.actor_id[:8]})")
async def unregister(self, actor_id: str):
74    async def unregister(self, actor_id: str):
75        async with self._lock:
76            if actor_id in self._actors:
77                del self._actors[actor_id]
78                logger.info(f"[Registry] Unregistered {actor_id[:8]}")
async def deliver(self, target_id: str, msg: Message) -> bool:
80    async def deliver(self, target_id: str, msg: Message) -> bool:
81        actor = self._actors.get(target_id)
82        if actor is None:
83            logger.warning(f"[Registry] Unknown target: {target_id[:8]}")
84            return False
85        await actor.receive(msg)
86        return True
async def broadcast( self, sender_id: str, msg_type: MessageType, payload=None):
88    async def broadcast(self, sender_id: str, msg_type: MessageType, payload=None):
89        msg = Message(type=msg_type, sender_id=sender_id, payload=payload)
90        for actor_id, actor in list(self._actors.items()):
91            if actor_id != sender_id:
92                await actor.receive(msg)
def get(self, actor_id: str) -> Optional[Actor]:
94    def get(self, actor_id: str) -> Optional[Actor]:
95        return self._actors.get(actor_id)
def all_actors(self) -> list[Actor]:
97    def all_actors(self) -> list[Actor]:
98        return list(self._actors.values())
def find_by_name(self, name: str) -> Optional[Actor]:
100    def find_by_name(self, name: str) -> Optional[Actor]:
101        for actor in self._actors.values():
102            if actor.name == name:
103                return actor
104        return None
class LLMAgent(wactorz.Actor):
412class LLMAgent(Actor):
413    """
414    An Actor that uses an LLM to process tasks.
415    Maintains conversation history and supports tool use.
416    """
417
418    def __init__(
419        self,
420        llm_provider: Optional[LLMProvider] = None,
421        system_prompt: str = "You are a helpful AI agent.",
422        max_history: int = 20,
423        summarize_threshold: int = 30,
424        **kwargs,
425    ):
426        super().__init__(**kwargs)
427        self.llm = llm_provider
428        self.system_prompt = system_prompt
429        self.max_history = max_history
430        self.summarize_threshold = summarize_threshold  # compress when history exceeds this
431        self._conversation_history: list[dict] = []
432        self._history_summary: str = ""   # rolling summary of compressed messages
433        self._current_task = "idle"
434        # Cost / token tracking — must be set here so subclasses (MainActor etc.) inherit them
435        self.total_input_tokens  = 0
436        self.total_output_tokens = 0
437        self.total_cost_usd      = 0.0
438
439    def _current_task_description(self) -> str:
440        return self._current_task
441
442    async def on_start(self):
443        # Restore conversation history and rolling summary from persistence
444        saved = self.recall("conversation_history", [])
445        clean = []
446        for m in saved:
447            if not isinstance(m, dict):
448                continue
449            role    = m.get("role", "")
450            content = m.get("content", "")
451            if role not in ("user", "assistant"):
452                continue
453            if not isinstance(content, str):
454                content = str(content)
455            if content.strip():
456                clean.append({"role": role, "content": content})
457        self._conversation_history = clean[-self.max_history:]
458        self._history_summary = self.recall("history_summary", "")
459
460        # Publish capability manifest so main's topic registry knows this agent exists
461        description = (
462            getattr(self, "DESCRIPTION", None)
463            or (self.__class__.__doc__ or "").strip().split("\n")[0]
464            or self.name
465        )
466        capabilities  = getattr(self, "CAPABILITIES", [])
467        input_schema  = getattr(self, "INPUT_SCHEMA",  {})
468        output_schema = getattr(self, "OUTPUT_SCHEMA", {})
469        await self.publish_manifest(
470            description=description,
471            capabilities=capabilities,
472            input_schema=input_schema,
473            output_schema=output_schema,
474        )
475
476    async def on_stop(self):
477        self.persist("conversation_history", self._conversation_history)
478        self.persist("history_summary", self._history_summary)
479
480    async def _maybe_summarize(self):
481        """
482        If history exceeds summarize_threshold, compress the oldest half into a
483        rolling summary and keep only the most recent max_history messages.
484        The summary is prepended as a system-style context message when sending
485        to the LLM so no facts are lost.
486        """
487        if len(self._conversation_history) < self.summarize_threshold:
488            return
489        if self.llm is None:
490            # No LLM — just truncate
491            self._conversation_history = self._conversation_history[-self.max_history:]
492            return
493
494        # Split: compress the older half, keep the recent half
495        split = len(self._conversation_history) // 2
496        to_compress = self._conversation_history[:split]
497        to_keep     = self._conversation_history[split:]
498
499        # Build compression prompt
500        prior_summary = f"Previous summary:\n{self._history_summary}\n\n" if self._history_summary else ""
501        messages_text = "\n".join(
502            f"{m['role'].upper()}: {m['content'][:400]}"
503            for m in to_compress
504        )
505        prompt = (
506            f"{prior_summary}"
507            f"Summarize the following conversation segment concisely. "
508            f"Preserve: key facts, decisions, user preferences, entity names, URLs, credentials, "
509            f"any technical details mentioned. Be specific, not vague.\n\n"
510            f"{messages_text}"
511        )
512        try:
513            summary, usage = await self.llm.complete(
514                messages=[{"role": "user", "content": prompt}],
515                system="You are a conversation summarizer. Output a dense, factual summary. No preamble.",
516                max_tokens=400,
517            )
518            self.total_input_tokens  += usage.get("input_tokens", 0)
519            self.total_output_tokens += usage.get("output_tokens", 0)
520            self.total_cost_usd      += usage.get("cost_usd", 0.0)
521            self._history_summary = summary.strip()
522            self._conversation_history = to_keep
523            self.persist("history_summary", self._history_summary)
524            self.persist("conversation_history", self._conversation_history)
525            logger.info(f"[{self.name}] History summarized: {len(to_compress)} messages → summary ({len(summary)} chars), keeping {len(to_keep)}")
526        except Exception as e:
527            logger.warning(f"[{self.name}] Summarization failed: {e} — truncating instead")
528            self._conversation_history = self._conversation_history[-self.max_history:]
529
530    def _build_messages_with_summary(self, n: int) -> list[dict]:
531        """
532        Build the message list to send to the LLM, prepending the rolling summary
533        as context if one exists.
534        """
535        recent = self._conversation_history[-n:]
536        if not self._history_summary:
537            return recent
538        # Inject summary as a user/assistant exchange so it fits the messages format
539        summary_ctx = [{
540            "role": "user",
541            "content": f"[Context from earlier in our conversation]\n{self._history_summary}"
542        }, {
543            "role": "assistant",
544            "content": "Understood, I have that context."
545        }]
546        return summary_ctx + recent
547
548    async def handle_message(self, msg: Message):
549        if msg.type == MessageType.TASK:
550            await self._handle_task(msg)
551
552    async def _handle_task(self, msg: Message):
553        if isinstance(msg.payload, dict):
554            # Accept "text", "task", "message", or fall back to JSON dump
555            task_text = (
556                msg.payload.get("text")
557                or msg.payload.get("task")
558                or msg.payload.get("message")
559                or msg.payload.get("query")
560                or str(msg.payload)
561            )
562        else:
563            task_text = str(msg.payload) if msg.payload is not None else ""
564        self._current_task = task_text[:60]
565
566        if self.llm is None:
567            logger.warning(f"[{self.name}] No LLM provider configured.")
568            return
569
570        start = time.time()
571        try:
572            self._conversation_history.append({"role": "user", "content": task_text})
573
574            response, _usage = await self.llm.complete(
575                messages=self._conversation_history[-self.max_history:],
576                system=self.system_prompt,
577            )
578
579            self._conversation_history.append({"role": "assistant", "content": response})
580            self.metrics.tasks_completed += 1
581            duration = time.time() - start
582
583            # Persist after each exchange
584            self.persist("conversation_history", self._conversation_history)
585
586            # Publish completion
587            await self._mqtt_publish(
588                f"agents/{self.actor_id}/completed",
589                {
590                    "result_preview": response[:200],
591                    "duration": duration,
592                    "task": task_text[:60],
593                },
594            )
595
596            # Reply to sender — echo _task_id so send_to() futures resolve
597            payload_dict = msg.payload if isinstance(msg.payload, dict) else {}
598            task_id  = payload_dict.get("_task_id")
599            reply_to = payload_dict.get("_reply_to") or msg.reply_to or msg.sender_id
600            if reply_to:
601                result = {"text": response, "task": task_text, "duration": duration}
602                if task_id:
603                    result["_task_id"] = task_id
604                await self.send(reply_to, MessageType.RESULT, result)
605
606        except Exception as e:
607            self.metrics.tasks_failed += 1
608            self.state_value = "failed_task"
609            logger.error(f"[{self.name}] LLM task failed: {e}", exc_info=True)
610
611        finally:
612            self._current_task = "idle"
613
614    async def chat(self, user_message: str) -> str:
615        """Direct async call - useful for the main conversation actor."""
616        if self.llm is None:
617            return "[No LLM configured]"
618
619        self.metrics.messages_processed += 1
620        self._conversation_history.append({"role": "user", "content": user_message})
621
622        safe_history = [
623            {"role": m["role"], "content": str(m["content"])}
624            for m in self._build_messages_with_summary(self.max_history)
625            if isinstance(m, dict)
626            and m.get("role") in ("user", "assistant")
627            and m.get("content") is not None
628        ]
629        response, usage = await self.llm.complete(
630            messages=safe_history,
631            system=self.system_prompt,
632        )
633        self._conversation_history.append({"role": "assistant", "content": response})
634        await self._maybe_summarize()
635        self.persist("conversation_history", self._conversation_history)
636
637        # Accumulate token usage and cost
638        self.total_input_tokens  += usage.get("input_tokens", 0)
639        self.total_output_tokens += usage.get("output_tokens", 0)
640        self.total_cost_usd      += usage.get("cost_usd", 0.0)
641
642        await self._mqtt_publish(
643            f"agents/{self.actor_id}/metrics",
644            self._build_metrics(),
645        )
646        return response
647
648    async def chat_stream(self, user_message: str):
649        """
650        Streaming version of chat(). Yields text chunks, then a final usage dict.
651        The caller is responsible for printing chunks as they arrive.
652
653        Usage:
654            async for chunk in agent.chat_stream("hello"):
655                if isinstance(chunk, dict):
656                    usage = chunk   # final usage summary
657                else:
658                    print(chunk, end="", flush=True)
659        """
660        if self.llm is None or not hasattr(self.llm, "stream"):
661            # Fallback: non-streaming — yield whole response as single chunk
662            response = await self.chat(user_message)
663            yield response
664            return
665
666        self.metrics.messages_processed += 1
667        self._conversation_history.append({"role": "user", "content": user_message})
668
669        full_text = []
670        usage     = {}
671
672        safe_history = [
673            {"role": m["role"], "content": str(m["content"])}
674            for m in self._build_messages_with_summary(self.max_history)
675            if isinstance(m, dict)
676            and m.get("role") in ("user", "assistant")
677            and m.get("content") is not None
678        ]
679        async for chunk in self.llm.stream(
680            messages=safe_history,
681            system=self.system_prompt,
682        ):
683            if isinstance(chunk, dict):
684                usage = chunk
685            else:
686                full_text.append(chunk)
687                yield chunk
688
689        response = "".join(full_text)
690        self._conversation_history.append({"role": "assistant", "content": response})
691        await self._maybe_summarize()
692        self.persist("conversation_history", self._conversation_history)
693
694        self.total_input_tokens  += usage.get("input_tokens", 0)
695        self.total_output_tokens += usage.get("output_tokens", 0)
696        self.total_cost_usd      += usage.get("cost_usd", 0.0)
697
698        await self._mqtt_publish(
699            f"agents/{self.actor_id}/metrics",
700            self._build_metrics(),
701        )
702
703        # Yield final usage dict so caller can log it
704        yield usage
705
706    def _build_metrics(self) -> dict:
707        m = super()._build_metrics()
708        m["input_tokens"]  = self.total_input_tokens
709        m["output_tokens"] = self.total_output_tokens
710        m["cost_usd"]      = round(self.total_cost_usd, 6)
711        return m
712
713    def clear_history(self):
714        self._conversation_history = []

An Actor that uses an LLM to process tasks. Maintains conversation history and supports tool use.

LLMAgent( llm_provider: Optional[wactorz.agents.llm_agent.LLMProvider] = None, system_prompt: str = 'You are a helpful AI agent.', max_history: int = 20, summarize_threshold: int = 30, **kwargs)
418    def __init__(
419        self,
420        llm_provider: Optional[LLMProvider] = None,
421        system_prompt: str = "You are a helpful AI agent.",
422        max_history: int = 20,
423        summarize_threshold: int = 30,
424        **kwargs,
425    ):
426        super().__init__(**kwargs)
427        self.llm = llm_provider
428        self.system_prompt = system_prompt
429        self.max_history = max_history
430        self.summarize_threshold = summarize_threshold  # compress when history exceeds this
431        self._conversation_history: list[dict] = []
432        self._history_summary: str = ""   # rolling summary of compressed messages
433        self._current_task = "idle"
434        # Cost / token tracking — must be set here so subclasses (MainActor etc.) inherit them
435        self.total_input_tokens  = 0
436        self.total_output_tokens = 0
437        self.total_cost_usd      = 0.0
llm
system_prompt
max_history
summarize_threshold
total_input_tokens
total_output_tokens
total_cost_usd
async def on_start(self):
442    async def on_start(self):
443        # Restore conversation history and rolling summary from persistence
444        saved = self.recall("conversation_history", [])
445        clean = []
446        for m in saved:
447            if not isinstance(m, dict):
448                continue
449            role    = m.get("role", "")
450            content = m.get("content", "")
451            if role not in ("user", "assistant"):
452                continue
453            if not isinstance(content, str):
454                content = str(content)
455            if content.strip():
456                clean.append({"role": role, "content": content})
457        self._conversation_history = clean[-self.max_history:]
458        self._history_summary = self.recall("history_summary", "")
459
460        # Publish capability manifest so main's topic registry knows this agent exists
461        description = (
462            getattr(self, "DESCRIPTION", None)
463            or (self.__class__.__doc__ or "").strip().split("\n")[0]
464            or self.name
465        )
466        capabilities  = getattr(self, "CAPABILITIES", [])
467        input_schema  = getattr(self, "INPUT_SCHEMA",  {})
468        output_schema = getattr(self, "OUTPUT_SCHEMA", {})
469        await self.publish_manifest(
470            description=description,
471            capabilities=capabilities,
472            input_schema=input_schema,
473            output_schema=output_schema,
474        )

Called when actor starts. Override for init logic.

async def on_stop(self):
476    async def on_stop(self):
477        self.persist("conversation_history", self._conversation_history)
478        self.persist("history_summary", self._history_summary)

Called when actor stops. Override for cleanup.

async def handle_message(self, msg: Message):
548    async def handle_message(self, msg: Message):
549        if msg.type == MessageType.TASK:
550            await self._handle_task(msg)

Handle messages not caught by default handlers.

async def chat(self, user_message: str) -> str:
614    async def chat(self, user_message: str) -> str:
615        """Direct async call - useful for the main conversation actor."""
616        if self.llm is None:
617            return "[No LLM configured]"
618
619        self.metrics.messages_processed += 1
620        self._conversation_history.append({"role": "user", "content": user_message})
621
622        safe_history = [
623            {"role": m["role"], "content": str(m["content"])}
624            for m in self._build_messages_with_summary(self.max_history)
625            if isinstance(m, dict)
626            and m.get("role") in ("user", "assistant")
627            and m.get("content") is not None
628        ]
629        response, usage = await self.llm.complete(
630            messages=safe_history,
631            system=self.system_prompt,
632        )
633        self._conversation_history.append({"role": "assistant", "content": response})
634        await self._maybe_summarize()
635        self.persist("conversation_history", self._conversation_history)
636
637        # Accumulate token usage and cost
638        self.total_input_tokens  += usage.get("input_tokens", 0)
639        self.total_output_tokens += usage.get("output_tokens", 0)
640        self.total_cost_usd      += usage.get("cost_usd", 0.0)
641
642        await self._mqtt_publish(
643            f"agents/{self.actor_id}/metrics",
644            self._build_metrics(),
645        )
646        return response

Direct async call - useful for the main conversation actor.

async def chat_stream(self, user_message: str):
648    async def chat_stream(self, user_message: str):
649        """
650        Streaming version of chat(). Yields text chunks, then a final usage dict.
651        The caller is responsible for printing chunks as they arrive.
652
653        Usage:
654            async for chunk in agent.chat_stream("hello"):
655                if isinstance(chunk, dict):
656                    usage = chunk   # final usage summary
657                else:
658                    print(chunk, end="", flush=True)
659        """
660        if self.llm is None or not hasattr(self.llm, "stream"):
661            # Fallback: non-streaming — yield whole response as single chunk
662            response = await self.chat(user_message)
663            yield response
664            return
665
666        self.metrics.messages_processed += 1
667        self._conversation_history.append({"role": "user", "content": user_message})
668
669        full_text = []
670        usage     = {}
671
672        safe_history = [
673            {"role": m["role"], "content": str(m["content"])}
674            for m in self._build_messages_with_summary(self.max_history)
675            if isinstance(m, dict)
676            and m.get("role") in ("user", "assistant")
677            and m.get("content") is not None
678        ]
679        async for chunk in self.llm.stream(
680            messages=safe_history,
681            system=self.system_prompt,
682        ):
683            if isinstance(chunk, dict):
684                usage = chunk
685            else:
686                full_text.append(chunk)
687                yield chunk
688
689        response = "".join(full_text)
690        self._conversation_history.append({"role": "assistant", "content": response})
691        await self._maybe_summarize()
692        self.persist("conversation_history", self._conversation_history)
693
694        self.total_input_tokens  += usage.get("input_tokens", 0)
695        self.total_output_tokens += usage.get("output_tokens", 0)
696        self.total_cost_usd      += usage.get("cost_usd", 0.0)
697
698        await self._mqtt_publish(
699            f"agents/{self.actor_id}/metrics",
700            self._build_metrics(),
701        )
702
703        # Yield final usage dict so caller can log it
704        yield usage

Streaming version of chat(). Yields text chunks, then a final usage dict. The caller is responsible for printing chunks as they arrive.

Usage: async for chunk in agent.chat_stream("hello"): if isinstance(chunk, dict): usage = chunk # final usage summary else: print(chunk, end="", flush=True)

def clear_history(self):
713    def clear_history(self):
714        self._conversation_history = []
class AnthropicProvider(wactorz.agents.llm_agent.LLMProvider):
 64class AnthropicProvider(LLMProvider):
 65    def __init__(self, model: str = "claude-sonnet-4-6", api_key: Optional[str] = None):
 66        import anthropic
 67        self.client = anthropic.AsyncAnthropic(api_key=api_key)
 68        self.model = model
 69
 70    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
 71        response = await self.client.messages.create(
 72            model=self.model,
 73            max_tokens=kwargs.get("max_tokens", 4096),
 74            system=system,
 75            messages=messages,
 76        )
 77        text = response.content[0].text
 78        usage = {
 79            "input_tokens":  response.usage.input_tokens,
 80            "output_tokens": response.usage.output_tokens,
 81            "cost_usd":      _calc_cost(self.model,
 82                                        response.usage.input_tokens,
 83                                        response.usage.output_tokens),
 84        }
 85        return text, usage
 86
 87    async def stream(self, messages: list[dict], system: str = "", **kwargs):
 88        """Yield text chunks as they arrive. Final item is a dict with usage."""
 89        input_tokens = output_tokens = 0
 90        async with self.client.messages.stream(
 91            model=self.model,
 92            max_tokens=kwargs.get("max_tokens", 4096),
 93            system=system,
 94            messages=messages,
 95        ) as s:
 96            async for chunk in s.text_stream:
 97                yield chunk
 98            # Final message has usage counts
 99            final = await s.get_final_message()
100            input_tokens  = final.usage.input_tokens
101            output_tokens = final.usage.output_tokens
102        yield {
103            "input_tokens":  input_tokens,
104            "output_tokens": output_tokens,
105            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
106        }

Base class for LLM providers.

AnthropicProvider(model: str = 'claude-sonnet-4-6', api_key: Optional[str] = None)
65    def __init__(self, model: str = "claude-sonnet-4-6", api_key: Optional[str] = None):
66        import anthropic
67        self.client = anthropic.AsyncAnthropic(api_key=api_key)
68        self.model = model
client
model
async def complete( self, messages: list[dict], system: str = '', **kwargs) -> tuple[str, dict]:
70    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
71        response = await self.client.messages.create(
72            model=self.model,
73            max_tokens=kwargs.get("max_tokens", 4096),
74            system=system,
75            messages=messages,
76        )
77        text = response.content[0].text
78        usage = {
79            "input_tokens":  response.usage.input_tokens,
80            "output_tokens": response.usage.output_tokens,
81            "cost_usd":      _calc_cost(self.model,
82                                        response.usage.input_tokens,
83                                        response.usage.output_tokens),
84        }
85        return text, usage

Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}

async def stream(self, messages: list[dict], system: str = '', **kwargs):
 87    async def stream(self, messages: list[dict], system: str = "", **kwargs):
 88        """Yield text chunks as they arrive. Final item is a dict with usage."""
 89        input_tokens = output_tokens = 0
 90        async with self.client.messages.stream(
 91            model=self.model,
 92            max_tokens=kwargs.get("max_tokens", 4096),
 93            system=system,
 94            messages=messages,
 95        ) as s:
 96            async for chunk in s.text_stream:
 97                yield chunk
 98            # Final message has usage counts
 99            final = await s.get_final_message()
100            input_tokens  = final.usage.input_tokens
101            output_tokens = final.usage.output_tokens
102        yield {
103            "input_tokens":  input_tokens,
104            "output_tokens": output_tokens,
105            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
106        }

Yield text chunks as they arrive. Final item is a dict with usage.

class OpenAIProvider(wactorz.agents.llm_agent.LLMProvider):
109class OpenAIProvider(LLMProvider):
110    def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
111        import openai
112        self.client = openai.AsyncOpenAI(api_key=api_key)
113        self.model = model
114
115    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
116        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
117        response = await self.client.chat.completions.create(
118            model=self.model,
119            messages=full_messages,
120            max_completion_tokens=kwargs.get("max_tokens", 4096),
121        )
122        text = response.choices[0].message.content
123        usage = {
124            "input_tokens":  response.usage.prompt_tokens,
125            "output_tokens": response.usage.completion_tokens,
126            "cost_usd":      _calc_cost(self.model,
127                                        response.usage.prompt_tokens,
128                                        response.usage.completion_tokens),
129        }
130        return text, usage
131
132    async def stream(self, messages: list[dict], system: str = "", **kwargs):
133        """Yield text chunks as they arrive. Final item is a dict with usage."""
134        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
135        input_tokens = output_tokens = 0
136        async with await self.client.chat.completions.create(
137            model=self.model,
138            messages=full_messages,
139            max_completion_tokens=kwargs.get("max_tokens", 4096),
140            stream=True,
141            stream_options={"include_usage": True},
142        ) as s:
143            async for chunk in s:
144                delta = chunk.choices[0].delta.content if chunk.choices else None
145                if delta:
146                    yield delta
147                if chunk.usage:
148                    input_tokens  = chunk.usage.prompt_tokens
149                    output_tokens = chunk.usage.completion_tokens
150        yield {
151            "input_tokens":  input_tokens,
152            "output_tokens": output_tokens,
153            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
154        }

Base class for LLM providers.

OpenAIProvider(model: str = 'gpt-4o', api_key: Optional[str] = None)
110    def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
111        import openai
112        self.client = openai.AsyncOpenAI(api_key=api_key)
113        self.model = model
client
model
async def complete( self, messages: list[dict], system: str = '', **kwargs) -> tuple[str, dict]:
115    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
116        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
117        response = await self.client.chat.completions.create(
118            model=self.model,
119            messages=full_messages,
120            max_completion_tokens=kwargs.get("max_tokens", 4096),
121        )
122        text = response.choices[0].message.content
123        usage = {
124            "input_tokens":  response.usage.prompt_tokens,
125            "output_tokens": response.usage.completion_tokens,
126            "cost_usd":      _calc_cost(self.model,
127                                        response.usage.prompt_tokens,
128                                        response.usage.completion_tokens),
129        }
130        return text, usage

Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}

async def stream(self, messages: list[dict], system: str = '', **kwargs):
132    async def stream(self, messages: list[dict], system: str = "", **kwargs):
133        """Yield text chunks as they arrive. Final item is a dict with usage."""
134        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
135        input_tokens = output_tokens = 0
136        async with await self.client.chat.completions.create(
137            model=self.model,
138            messages=full_messages,
139            max_completion_tokens=kwargs.get("max_tokens", 4096),
140            stream=True,
141            stream_options={"include_usage": True},
142        ) as s:
143            async for chunk in s:
144                delta = chunk.choices[0].delta.content if chunk.choices else None
145                if delta:
146                    yield delta
147                if chunk.usage:
148                    input_tokens  = chunk.usage.prompt_tokens
149                    output_tokens = chunk.usage.completion_tokens
150        yield {
151            "input_tokens":  input_tokens,
152            "output_tokens": output_tokens,
153            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
154        }

Yield text chunks as they arrive. Final item is a dict with usage.

class OllamaProvider(wactorz.agents.llm_agent.LLMProvider):
157class OllamaProvider(LLMProvider):
158    """Local LLM via Ollama."""
159    def __init__(self, model: str = "llama3", base_url: str = "http://localhost:11434"):
160        self.model = model
161        self.base_url = base_url
162
163    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
164        import aiohttp
165        payload = {"model": self.model, "messages": messages, "stream": False}
166        if system:
167            payload["system"] = system
168        async with aiohttp.ClientSession() as session:
169            async with session.post(f"{self.base_url}/api/chat", json=payload) as resp:
170                data = await resp.json()
171        text = data["message"]["content"]
172        prompt_eval = data.get("prompt_eval_count", 0)
173        eval_count  = data.get("eval_count", 0)
174        usage = {"input_tokens": prompt_eval, "output_tokens": eval_count, "cost_usd": 0.0}
175        return text, usage
176
177    async def stream(self, messages: list[dict], system: str = "", **kwargs):
178        """Yield text chunks as they arrive. Final item is a dict with usage."""
179        import aiohttp, json as _json
180        payload = {"model": self.model, "messages": messages, "stream": True}
181        if system:
182            payload["system"] = system
183        input_tokens = output_tokens = 0
184        async with aiohttp.ClientSession() as session:
185            async with session.post(f"{self.base_url}/api/chat", json=payload) as resp:
186                async for raw in resp.content:
187                    if not raw.strip():
188                        continue
189                    try:
190                        data = _json.loads(raw)
191                    except Exception:
192                        continue
193                    delta = (data.get("message") or {}).get("content", "")
194                    if delta:
195                        yield delta
196                    if data.get("done"):
197                        input_tokens  = data.get("prompt_eval_count", 0)
198                        output_tokens = data.get("eval_count", 0)
199        yield {"input_tokens": input_tokens, "output_tokens": output_tokens, "cost_usd": 0.0}

Local LLM via Ollama.

OllamaProvider(model: str = 'llama3', base_url: str = 'http://localhost:11434')
159    def __init__(self, model: str = "llama3", base_url: str = "http://localhost:11434"):
160        self.model = model
161        self.base_url = base_url
model
base_url
async def complete( self, messages: list[dict], system: str = '', **kwargs) -> tuple[str, dict]:
163    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
164        import aiohttp
165        payload = {"model": self.model, "messages": messages, "stream": False}
166        if system:
167            payload["system"] = system
168        async with aiohttp.ClientSession() as session:
169            async with session.post(f"{self.base_url}/api/chat", json=payload) as resp:
170                data = await resp.json()
171        text = data["message"]["content"]
172        prompt_eval = data.get("prompt_eval_count", 0)
173        eval_count  = data.get("eval_count", 0)
174        usage = {"input_tokens": prompt_eval, "output_tokens": eval_count, "cost_usd": 0.0}
175        return text, usage

Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}

async def stream(self, messages: list[dict], system: str = '', **kwargs):
177    async def stream(self, messages: list[dict], system: str = "", **kwargs):
178        """Yield text chunks as they arrive. Final item is a dict with usage."""
179        import aiohttp, json as _json
180        payload = {"model": self.model, "messages": messages, "stream": True}
181        if system:
182            payload["system"] = system
183        input_tokens = output_tokens = 0
184        async with aiohttp.ClientSession() as session:
185            async with session.post(f"{self.base_url}/api/chat", json=payload) as resp:
186                async for raw in resp.content:
187                    if not raw.strip():
188                        continue
189                    try:
190                        data = _json.loads(raw)
191                    except Exception:
192                        continue
193                    delta = (data.get("message") or {}).get("content", "")
194                    if delta:
195                        yield delta
196                    if data.get("done"):
197                        input_tokens  = data.get("prompt_eval_count", 0)
198                        output_tokens = data.get("eval_count", 0)
199        yield {"input_tokens": input_tokens, "output_tokens": output_tokens, "cost_usd": 0.0}

Yield text chunks as they arrive. Final item is a dict with usage.

class NIMProvider(wactorz.agents.llm_agent.LLMProvider):
202class NIMProvider(LLMProvider):
203    """
204    NVIDIA NIM — OpenAI-compatible API hosted at integrate.api.nvidia.com.
205    Free tier: 1000 requests/month per model. No local GPU required.
206
207    Popular free models:
208      meta/llama-3.1-8b-instruct          — fast, lightweight
209      meta/llama-3.3-70b-instruct         — strong general purpose
210      mistralai/mistral-7b-instruct-v0.3  — fast & capable
211      mistralai/mixtral-8x7b-instruct-v0.1
212      google/gemma-3-27b-it
213      microsoft/phi-3-mini-128k-instruct
214      deepseek-ai/deepseek-r1             — reasoning model
215      deepseek-ai/deepseek-r1-distill-qwen-7b
216      nvidia/llama-3.1-nemotron-70b-instruct
217      nvidia/llama-3.3-nemotron-super-49b-v1
218
219    Get a free API key at: https://build.nvidia.com
220    """
221
222    NIM_BASE_URL = "https://integrate.api.nvidia.com/v1"
223
224    def __init__(
225        self,
226        model:    str = "meta/llama-3.3-70b-instruct",
227        api_key:  Optional[str] = None,
228        base_url: str = NIM_BASE_URL,
229    ):
230        import openai
231        self.model  = model
232        self.client = openai.AsyncOpenAI(
233            api_key=api_key or "dummy",   # NIM free tier may not require a key locally
234            base_url=base_url,
235        )
236
237    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
238        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
239        response = await self.client.chat.completions.create(
240            model=self.model,
241            messages=full_messages,
242            max_tokens=kwargs.get("max_tokens", 4096),
243        )
244        text = response.choices[0].message.content
245        input_tok  = response.usage.prompt_tokens     if response.usage else 0
246        output_tok = response.usage.completion_tokens if response.usage else 0
247        usage = {
248            "input_tokens":  input_tok,
249            "output_tokens": output_tok,
250            "cost_usd":      _calc_cost(self.model, input_tok, output_tok),
251        }
252        return text, usage
253
254    async def stream(self, messages: list[dict], system: str = "", **kwargs):
255        """Yield text chunks as they arrive. Final item is a dict with usage."""
256        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
257        input_tokens = output_tokens = 0
258        async with await self.client.chat.completions.create(
259            model=self.model,
260            messages=full_messages,
261            max_tokens=kwargs.get("max_tokens", 4096),
262            stream=True,
263        ) as s:
264            async for chunk in s:
265                delta = chunk.choices[0].delta.content if chunk.choices else None
266                if delta:
267                    yield delta
268                if chunk.usage:
269                    input_tokens  = chunk.usage.prompt_tokens
270                    output_tokens = chunk.usage.completion_tokens
271        yield {
272            "input_tokens":  input_tokens,
273            "output_tokens": output_tokens,
274            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
275        }

NVIDIA NIM — OpenAI-compatible API hosted at integrate.api.nvidia.com. Free tier: 1000 requests/month per model. No local GPU required.

Popular free models: meta/llama-3.1-8b-instruct — fast, lightweight meta/llama-3.3-70b-instruct — strong general purpose mistralai/mistral-7b-instruct-v0.3 — fast & capable mistralai/mixtral-8x7b-instruct-v0.1 google/gemma-3-27b-it microsoft/phi-3-mini-128k-instruct deepseek-ai/deepseek-r1 — reasoning model deepseek-ai/deepseek-r1-distill-qwen-7b nvidia/llama-3.1-nemotron-70b-instruct nvidia/llama-3.3-nemotron-super-49b-v1

Get a free API key at: https://build.nvidia.com

NIMProvider( model: str = 'meta/llama-3.3-70b-instruct', api_key: Optional[str] = None, base_url: str = 'https://integrate.api.nvidia.com/v1')
224    def __init__(
225        self,
226        model:    str = "meta/llama-3.3-70b-instruct",
227        api_key:  Optional[str] = None,
228        base_url: str = NIM_BASE_URL,
229    ):
230        import openai
231        self.model  = model
232        self.client = openai.AsyncOpenAI(
233            api_key=api_key or "dummy",   # NIM free tier may not require a key locally
234            base_url=base_url,
235        )
NIM_BASE_URL = 'https://integrate.api.nvidia.com/v1'
model
client
async def complete( self, messages: list[dict], system: str = '', **kwargs) -> tuple[str, dict]:
237    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
238        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
239        response = await self.client.chat.completions.create(
240            model=self.model,
241            messages=full_messages,
242            max_tokens=kwargs.get("max_tokens", 4096),
243        )
244        text = response.choices[0].message.content
245        input_tok  = response.usage.prompt_tokens     if response.usage else 0
246        output_tok = response.usage.completion_tokens if response.usage else 0
247        usage = {
248            "input_tokens":  input_tok,
249            "output_tokens": output_tok,
250            "cost_usd":      _calc_cost(self.model, input_tok, output_tok),
251        }
252        return text, usage

Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}

async def stream(self, messages: list[dict], system: str = '', **kwargs):
254    async def stream(self, messages: list[dict], system: str = "", **kwargs):
255        """Yield text chunks as they arrive. Final item is a dict with usage."""
256        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
257        input_tokens = output_tokens = 0
258        async with await self.client.chat.completions.create(
259            model=self.model,
260            messages=full_messages,
261            max_tokens=kwargs.get("max_tokens", 4096),
262            stream=True,
263        ) as s:
264            async for chunk in s:
265                delta = chunk.choices[0].delta.content if chunk.choices else None
266                if delta:
267                    yield delta
268                if chunk.usage:
269                    input_tokens  = chunk.usage.prompt_tokens
270                    output_tokens = chunk.usage.completion_tokens
271        yield {
272            "input_tokens":  input_tokens,
273            "output_tokens": output_tokens,
274            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
275        }

Yield text chunks as they arrive. Final item is a dict with usage.

class MainActor(wactorz.LLMAgent):
 644class MainActor(LLMAgent):
 645    DESCRIPTION  = "Main orchestrator: spawns agents, routes tasks, manages the multi-agent system"
 646    CAPABILITIES = ["spawn_agent", "list_agents", "list_nodes", "list_topics", "orchestration"]
 647
 648    INTENT_CLASSIFIER_PROMPT = (
 649        "You are a routing classifier for a smart home AI assistant.\n"
 650        "Respond with exactly one token: HA, PIPELINE, or OTHER.\n\n"
 651        "HA = a direct, one-shot Home Assistant action or query:\n"
 652        "  - Turn on/off a device right now\n"
 653        "  - List devices, areas, entities, automations\n"
 654        "  - Create/edit/delete a HA automation\n"
 655        "  - Set temperature, dim lights, lock door — immediate action\n\n"
 656        "PIPELINE = a reactive rule that should run continuously:\n"
 657        "  - 'if X happens then do Y' — any conditional/reactive logic\n"
 658        "  - 'when X send me a message/notification'\n"
 659        "  - 'whenever X turns on/off do Y'\n"
 660        "  - Any rule involving a sensor state change triggering an action or notification\n"
 661        "  - Any webcam/camera detection triggering anything\n"
 662        "  - Anything involving Discord/Telegram notifications triggered by an event\n\n"
 663        "OTHER = general conversation, coding, questions, anything not HA or pipeline related."
 664    )
 665
 666    def __init__(self, llm_provider: Optional[LLMProvider] = None, **kwargs):
 667        kwargs.setdefault("name", "main")
 668        kwargs.setdefault("system_prompt", ORCHESTRATOR_PROMPT)
 669        super().__init__(llm_provider=llm_provider, **kwargs)
 670        self._result_futures: dict[str, asyncio.Future] = {}
 671        # Queued monitor notifications — prepended to next user response
 672        self._pending_notifications: list[dict] = []
 673        self.protected = True
 674        # Remote node tracking: node_name → {"last_seen": float, "agents": [...]}
 675        self._known_nodes: dict[str, dict] = {}
 676        # Topic registry: topic → [manifest, ...] — built from agents/+/manifest
 677        self._topic_registry: dict[str, list] = {}  # topic → list of agent manifests
 678        self._agent_manifests: dict[str, dict] = {}  # agent name → latest manifest (includes schemas)
 679
 680    # ── Lifecycle ──────────────────────────────────────────────────────────
 681
 682    async def on_start(self):
 683        await super().on_start()
 684        await self._restore_spawned_agents()
 685        # Listen for remote node heartbeats so we know what's online
 686        self._tasks.append(asyncio.create_task(self._node_heartbeat_listener()))
 687        # Listen for agent capability manifests to build topic registry
 688        self._tasks.append(asyncio.create_task(self._manifest_listener()))
 689        # Inject persisted user facts into system prompt
 690        self._inject_user_facts_into_prompt()
 691
 692    # ── Spawn registry ─────────────────────────────────────────────────────
 693
 694    def _get_spawn_registry(self) -> dict:
 695        return self.recall(SPAWN_REGISTRY_KEY) or {}
 696
 697    def _save_to_spawn_registry(self, config: dict):
 698        reg = self._get_spawn_registry()
 699        reg[config["name"]] = config
 700        self.persist(SPAWN_REGISTRY_KEY, reg)
 701        logger.info(f"[{self.name}] Spawn registry: {list(reg.keys())}")
 702
 703    def _remove_from_spawn_registry(self, name: str):
 704        reg = self._get_spawn_registry()
 705        if name in reg:
 706            del reg[name]
 707            self.persist(SPAWN_REGISTRY_KEY, reg)
 708            logger.info(f"[{self.name}] Removed '{name}' from spawn registry.")
 709
 710    # ── Pipeline rules registry ────────────────────────────────────────────
 711    # Stores grouped rules: one entry per user request, listing all agents spawned for it.
 712    # Schema: { rule_id: { "rule_id", "task", "agents": [str], "created_at": float } }
 713
 714    def get_pipeline_rules(self) -> dict:
 715        return self.recall(PIPELINE_RULES_KEY) or {}
 716
 717    def save_pipeline_rule(self, rule: dict):
 718        rules = self.get_pipeline_rules()
 719        rules[rule["rule_id"]] = rule
 720        self.persist(PIPELINE_RULES_KEY, rules)
 721        logger.info(f"[{self.name}] Pipeline rule saved: {rule['rule_id']} agents={rule.get('agents', [])}")
 722
 723    def get_notification_urls(self) -> dict:
 724        """Return persisted notification webhook URLs (discord, telegram, slack, etc.)"""
 725        return self.recall("_notification_urls") or {}
 726
 727    # ── User facts ─────────────────────────────────────────────────────────
 728    # Key facts extracted from conversation: HA URL, entity names, preferences,
 729    # user name, webhook URLs, etc. Stored separately from history so they
 730    # survive summarization and persist indefinitely.
 731
 732    _FACTS_EXTRACT_PROMPT = (
 733        "Extract durable facts from this conversation exchange that would be useful to remember "
 734        "long-term. Focus on: names, locations, device entity IDs, URLs, credentials, preferences, "
 735        "configurations, and any explicit statements about the user's setup.\n"
 736        "Return a JSON object with short descriptive keys and concise values. "
 737        "Return {} if nothing worth remembering was said.\n"
 738        "Example: {\"ha_url\": \"http://192.168.1.10:8123\", \"user_name\": \"Alex\", "
 739        "\"living_room_light\": \"light.wiz_rgbw_tunable_02cba0\"}\n"
 740        "Output only valid JSON. No explanation, no markdown."
 741    )
 742
 743    def get_user_facts(self) -> dict:
 744        return self.recall("_user_facts") or {}
 745
 746    def _inject_user_facts_into_prompt(self):
 747        """Prepend known user facts to the system prompt so the LLM always has them."""
 748        facts = self.get_user_facts()
 749        if not facts:
 750            return
 751        facts_lines = "\n".join(f"  {k}: {v}" for k, v in facts.items())
 752        facts_block = f"\n\n== KNOWN USER FACTS (always keep in mind) ==\n{facts_lines}"
 753        # Avoid duplicating if already injected
 754        marker = "== KNOWN USER FACTS"
 755        base_prompt = ORCHESTRATOR_PROMPT
 756        if marker in self.system_prompt:
 757            # Replace existing facts block
 758            self.system_prompt = base_prompt + facts_block
 759        else:
 760            self.system_prompt = self.system_prompt + facts_block
 761
 762    async def _extract_and_save_facts(self, user_message: str, assistant_response: str):
 763        """After each exchange, ask the LLM to extract any new durable facts."""
 764        if self.llm is None:
 765            return
 766        exchange = f"USER: {user_message[:600]}\nASSISTANT: {assistant_response[:600]}"
 767        try:
 768            raw, _ = await self.llm.complete(
 769                messages=[{"role": "user", "content": exchange}],
 770                system=self._FACTS_EXTRACT_PROMPT,
 771                max_tokens=200,
 772            )
 773            import json as _json, re as _re
 774            clean = raw.strip().lstrip("```json").lstrip("```").rstrip("```").strip()
 775            new_facts = _json.loads(clean)
 776            if not isinstance(new_facts, dict) or not new_facts:
 777                return
 778            # Merge with existing facts
 779            facts = self.get_user_facts()
 780            facts.update(new_facts)
 781            self.persist("_user_facts", facts)
 782            self._inject_user_facts_into_prompt()
 783            logger.info(f"[{self.name}] User facts updated: {list(new_facts.keys())}")
 784        except Exception as e:
 785            logger.debug(f"[{self.name}] Facts extraction skipped: {e}")
 786
 787    async def delete_pipeline_rule(self, rule_id: str) -> str:
 788        """Stop all agents for a rule and remove it from registry."""
 789        rules = self.get_pipeline_rules()
 790        rule = rules.get(rule_id)
 791        if not rule:
 792            return f"No rule found with id '{rule_id}'."
 793        agents = rule.get("agents", [])
 794        stopped = []
 795        for agent_name in agents:
 796            self._remove_from_spawn_registry(agent_name)
 797            if self._registry:
 798                actor = self._registry.find_by_name(agent_name)
 799                if actor:
 800                    await actor.stop()
 801                    await self._registry.unregister(actor.actor_id)
 802                    stopped.append(agent_name)
 803        del rules[rule_id]
 804        self.persist(PIPELINE_RULES_KEY, rules)
 805        task_preview = rule.get("task", "")[:60]
 806        return f"Rule '{rule_id}' deleted. Stopped agents: {', '.join(stopped) or 'none running'}.\nRule was: {task_preview}"
 807
 808    async def _restore_spawned_agents(self):
 809        reg = self._get_spawn_registry()
 810        if not reg:
 811            return
 812        logger.info(f"[{self.name}] Restoring {len(reg)} agent(s): {list(reg.keys())}")
 813        for name, config in reg.items():
 814            node = config.get("node", "").strip()
 815            if node:
 816                # Remote agent — re-publish spawn to its node; no local object expected
 817                logger.info(f"[{self.name}] Re-spawning remote agent '{name}' on node '{node}'")
 818                try:
 819                    await self._spawn_remote(config, node, save=False)
 820                except Exception as e:
 821                    logger.error(f"[{self.name}] Failed to restore remote '{name}' on '{node}': {e}")
 822                continue
 823            if self._registry and self._registry.find_by_name(name):
 824                logger.info(f"[{self.name}] '{name}' already running, skipping.")
 825                continue
 826            try:
 827                await self._spawn_from_config(config, save=False)
 828                logger.info(f"[{self.name}] Restored: {name}")
 829            except Exception as e:
 830                logger.error(f"[{self.name}] Failed to restore '{name}': {e}")
 831
 832    # ── Message handling ───────────────────────────────────────────────────
 833
 834    async def handle_message(self, msg: Message):
 835        if msg.type == MessageType.TASK:
 836            # Intercept monitor notifications BEFORE passing to LLM _handle_task
 837            if isinstance(msg.payload, dict) and msg.payload.get("_monitor_notification"):
 838                self._pending_notifications.append(msg.payload)
 839                logger.info(f"[{self.name}] Monitor alert queued: {msg.payload.get('message','')[:80]}")
 840                return
 841            await self._handle_task(msg)
 842
 843        elif msg.type == MessageType.RESULT:
 844            if isinstance(msg.payload, dict):
 845                # Support both key names: "_task_id" (new) and "task" (legacy)
 846                fid = msg.payload.get("_task_id") or msg.payload.get("task")
 847                if fid and fid in self._result_futures:
 848                    fut = self._result_futures[fid]
 849                    if not fut.done():
 850                        fut.set_result(msg.payload)
 851
 852    # ── Home Automation intent detection ───────────────────────────────────
 853
 854    @staticmethod
 855    def _looks_like_home_automation_request(text: str) -> bool:
 856        lowered = (text or "").lower()
 857        if "home assistant" in lowered:
 858            return True
 859        if lowered.startswith("spawn ") or lowered.startswith("/"):
 860            return False
 861
 862        # Wactorz pipeline requests — these involve external sensors/agents, not HA natively
 863        # Route to planner instead of HA agent
 864        _pipeline_keywords = [
 865            "camera", "webcam", "yolo", "detect", "detection", "person detect",
 866            "object detect", "laptop camera", "cv2", "opencv",
 867            "when detected", "if detected", "whenever detected",
 868            "notify me", "send me a message", "send me a discord",
 869            "discord", "telegram", "whatsapp",
 870        ]
 871        if any(kw in lowered for kw in _pipeline_keywords):
 872            return False
 873
 874        has_trigger = any(token in lowered for token in [
 875            "when ", "if ", "on ", "whenever ", "after ", "before ",
 876            "as soon as ", "at ",
 877        ])
 878        has_action = any(token in lowered for token in [
 879            "turn on", "turn off", "open", "close", "lock", "unlock", "dim", "set",
 880        ])
 881        has_automation_intent = any(token in lowered for token in [
 882            "automate", "automation", "routine", "scene", "trigger", "schedule",
 883            "presence", "motion", "door", "window", "sensor", "alarm",
 884            "romantic", "cozy", "ambience", "ambiance",
 885        ])
 886        has_home_context = any(token in lowered for token in [
 887            "home", "house", "apartment", "room", "living room", "bedroom",
 888            "kitchen", "hallway", "garage", "porch",
 889        ])
 890
 891        return (
 892            (has_trigger and has_action)
 893            or (has_trigger and has_automation_intent)
 894            or (has_automation_intent and has_home_context)
 895        )
 896
 897    async def _classify_intent(self, text: str) -> str:
 898        """
 899        Classify user intent as HA, PIPELINE, or OTHER using a single cheap LLM call.
 900        Returns one of: 'HA', 'PIPELINE', 'OTHER'
 901        """
 902        if not text or text.startswith("/"):
 903            return "OTHER"
 904        if self.llm is None:
 905            return "OTHER"
 906        try:
 907            decision, _ = await asyncio.wait_for(
 908                self.llm.complete(
 909                    messages=[{"role": "user", "content": text}],
 910                    system=self.INTENT_CLASSIFIER_PROMPT,
 911                    max_tokens=4,
 912                ),
 913                timeout=5.0,
 914            )
 915            token = (decision or "").strip().upper().split()[0] if decision else "OTHER"
 916            if token in ("HA", "PIPELINE", "OTHER"):
 917                return token
 918            return "OTHER"
 919        except Exception as e:
 920            logger.debug(f"[{self.name}] Intent classification failed: {e}")
 921            return "OTHER"
 922
 923    async def _is_home_automation_request(self, text: str) -> bool:
 924        # Keep for backward compat — delegates to _classify_intent
 925        intent = await self._classify_intent(text)
 926        return intent == "HA"
 927
 928    # ── User input ─────────────────────────────────────────────────────────
 929
 930    async def chat(self, user_message: str) -> str:
 931        response = await super().chat(user_message)
 932        # Fire-and-forget fact extraction — don't block the response
 933        asyncio.create_task(self._extract_and_save_facts(user_message, response))
 934        return response
 935
 936    async def chat_stream(self, user_message: str):
 937        full_response = []
 938        async for chunk in super().chat_stream(user_message):
 939            if isinstance(chunk, dict):
 940                yield chunk
 941            else:
 942                full_response.append(chunk)
 943                yield chunk
 944        # Extract facts from completed response
 945        if full_response:
 946            asyncio.create_task(
 947                self._extract_and_save_facts(user_message, "".join(full_response))
 948            )
 949
 950
 951    def _drain_notifications(self) -> str:
 952        """Pop queued monitor notifications as a formatted prefix string."""
 953        if not self._pending_notifications:
 954            return ""
 955        icons = {"critical": "\U0001f534", "warning": "\U0001f7e1", "info": "\u2705"}
 956        lines = []
 957        for n in self._pending_notifications:
 958            icon = icons.get(n.get("severity", "warning"), "\u26a0\ufe0f")
 959            lines.append(f"{icon} **System:** {n.get('message', '').strip()}")
 960        self._pending_notifications.clear()
 961        return "\n".join(lines) + "\n\n---\n\n"
 962
 963    async def process_user_input(self, text: str) -> str:
 964        note_prefix = self._drain_notifications()
 965
 966        # ── Direct API intercepts — handle without LLM round-trip ──────────
 967        stripped = text.strip().rstrip("()")
 968        if stripped in ("main.list_nodes", "list_nodes", "/nodes"):
 969            nodes = self.list_nodes()
 970            if not nodes:
 971                return note_prefix + "No remote nodes seen yet. Deploy one with /deploy <node-name>."
 972            import time as _t
 973            lines = []
 974            for nd in sorted(nodes, key=lambda x: x["node"]):
 975                status   = "🟢 online" if nd["online"] else "🔴 offline"
 976                agents   = ", ".join(nd["agents"]) or "(no agents)"
 977                age      = int(_t.time() - nd["last_seen"])
 978                lines.append(f"  {nd['node']:22s} {status}  |  agents: {agents}  |  last heartbeat: {age}s ago")
 979            return note_prefix + "Remote nodes:\n" + "\n".join(lines)
 980
 981        if stripped.startswith("/topics"):
 982            keyword = stripped[7:].strip().lstrip("(").rstrip(")")
 983            topics = self.list_topics(keyword)
 984            if not topics:
 985                msg = f"No topics found" + (f" matching '{keyword}'" if keyword else "") + "."
 986                msg += " Topics are registered automatically when agents publish for the first time."
 987                return note_prefix + msg
 988            lines = [f"Known MQTT topics{' matching ' + repr(keyword) if keyword else ''}:"]
 989            for t in topics:
 990                agent_strs = ", ".join(
 991                    f"{a['name']}" + (f" ({a['node']})" if a.get("node") else "")
 992                    for a in t["agents"]
 993                )
 994                lines.append(f"  {t['topic']:40s}{agent_strs}")
 995            return note_prefix + "\n".join(lines)
 996
 997        # ── Webhook / notification URL management ───────────────────────────
 998        if stripped.startswith("/memory"):
 999            parts = stripped.split(None, 1)
1000            sub = parts[1].strip() if len(parts) > 1 else ""
1001            if sub == "clear":
1002                self.persist("_user_facts", {})
1003                self.persist("history_summary", "")
1004                self._history_summary = ""
1005                self.system_prompt = ORCHESTRATOR_PROMPT
1006                return note_prefix + "Memory cleared — user facts and conversation summary reset."
1007            if sub.startswith("forget "):
1008                key = sub[7:].strip()
1009                facts = self.get_user_facts()
1010                if key in facts:
1011                    del facts[key]
1012                    self.persist("_user_facts", facts)
1013                    self._inject_user_facts_into_prompt()
1014                    return note_prefix + f"Forgotten: '{key}'"
1015                return note_prefix + f"No fact found with key '{key}'."
1016            # Default: show memory
1017            facts = self.get_user_facts()
1018            summary = self._history_summary
1019            lines = []
1020            if facts:
1021                lines.append(f"User facts ({len(facts)}):")
1022                for k, v in facts.items():
1023                    lines.append(f"  {k}: {v}")
1024            else:
1025                lines.append("No user facts stored yet.")
1026            if summary:
1027                lines.append(f"\nConversation summary:\n  {summary[:300]}{'...' if len(summary) > 300 else ''}")
1028            else:
1029                lines.append("\nNo conversation summary yet.")
1030            lines.append("\nCommands: /memory clear | /memory forget <key>")
1031            return note_prefix + "\n".join(lines)
1032
1033        if stripped.startswith("/webhook"):
1034            parts = stripped.split(None, 2)
1035            if len(parts) == 1:
1036                # /webhook — show stored URLs
1037                urls = self.recall("_notification_urls") or {}
1038                if not urls:
1039                    return note_prefix + "No notification URLs stored.\nUse: /webhook discord <url>  or  /webhook telegram <url>"
1040                lines = ["Stored notification URLs:"]
1041                for svc, url in urls.items():
1042                    lines.append(f"  {svc}: {url}")
1043                return note_prefix + "\n".join(lines)
1044            elif len(parts) >= 3:
1045                # /webhook discord <url>
1046                service = parts[1].lower()
1047                url = parts[2].strip()
1048                urls = self.recall("_notification_urls") or {}
1049                urls[service] = url
1050                self.persist("_notification_urls", urls)
1051                return note_prefix + f"Saved {service} webhook URL. Pipelines will use it automatically."
1052            else:
1053                return note_prefix + "Usage: /webhook <service> <url>\nExample: /webhook discord https://discord.com/api/webhooks/..."
1054
1055        # Auto-detect webhook URLs in any message and persist them
1056        import re as _re
1057        _webhook_match = _re.search(
1058            r'https?://(?:discord\.com/api/webhooks|hooks\.slack\.com|api\.telegram\.org)/\S+',
1059            text
1060        )
1061        if _webhook_match:
1062            url = _webhook_match.group(0).rstrip(".,;!)'\"")
1063            urls = self.recall("_notification_urls") or {}
1064            if "discord" in url:
1065                urls["discord"] = url
1066            elif "slack" in url:
1067                urls["slack"] = url
1068            elif "telegram" in url:
1069                urls["telegram"] = url
1070            self.persist("_notification_urls", urls)
1071            logger.info(f"[{self.name}] Auto-saved webhook URL from message")
1072
1073        if stripped in ("/rules", "rules"):
1074            rules = self.get_pipeline_rules()
1075            if not rules:
1076                return note_prefix + "No pipeline rules active.\nDescribe a reactive rule to create one, e.g. 'when the door opens send me a Discord message'."
1077            lines = [f"Active pipeline rules ({len(rules)}):"]
1078            for rule_id, rule in sorted(rules.items(), key=lambda x: x[1].get("created_at", 0)):
1079                agents = rule.get("agents", [])
1080                task = rule.get("task", "")[:80]
1081                import datetime
1082                ts = rule.get("created_at", 0)
1083                created = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M") if ts else "unknown"
1084                # Check which agents are running
1085                running_agents = []
1086                stopped_agents = []
1087                for a in agents:
1088                    if self._registry and self._registry.find_by_name(a):
1089                        running_agents.append(a)
1090                    else:
1091                        stopped_agents.append(a)
1092                status = "🟢" if running_agents else "🔴"
1093                lines.append(f"\n{status} [{rule_id}] — {task}")
1094                lines.append(f"   agents  : {', '.join(agents)}")
1095                if stopped_agents:
1096                    lines.append(f"   stopped : {', '.join(stopped_agents)}")
1097                lines.append(f"   created : {created}")
1098            lines.append("\nTo delete a rule: /rules delete <rule_id>")
1099            return note_prefix + "\n".join(lines)
1100
1101        if stripped.startswith("/rules delete "):
1102            rule_id = stripped[len("/rules delete "):].strip()
1103            result = await self.delete_pipeline_rule(rule_id)
1104            return note_prefix + result
1105
1106        if stripped.startswith("/rules"):
1107            keyword = stripped[14:].strip().lstrip("(").rstrip(")")
1108            caps = self.list_capabilities(keyword)
1109            if not caps:
1110                msg = "No agents found" + (f" matching '{keyword}'" if keyword else "") + "."
1111                msg += " Agents publish their capabilities on startup."
1112                return note_prefix + msg
1113            lines = ["Agent capabilities" + (" matching " + repr(keyword) if keyword else "") + ":"]
1114            for a in caps:
1115                lines.append("")
1116                lines.append("  [" + a["name"] + "]" + (" on " + a["node"] if a.get("node") else ""))
1117                lines.append("    description : " + a["description"])
1118                if a["capabilities"]:
1119                    lines.append("    capabilities: " + ", ".join(a["capabilities"]))
1120                if a["input_schema"]:
1121                    lines.append("    input       : " + str(a["input_schema"]))
1122                if a["output_schema"]:
1123                    lines.append("    output      : " + str(a["output_schema"]))
1124            return note_prefix + "\n".join(lines)
1125
1126                # ── @mention direct routing ─────────────────────────────────────────
1127        if text.startswith("@"):
1128            # Extract agent name and message: "@cpu-monitor-rpi-room what is the cpu?"
1129            parts       = text.split(None, 1)
1130            target_name = parts[0].lstrip("@").rstrip(":,")
1131            message     = parts[1].strip() if len(parts) > 1 else text
1132
1133            # Try local registry first
1134            local_target = self._registry.find_by_name(target_name) if self._registry else None
1135            if not local_target:
1136                # Not running — check if it's a spawnable catalog recipe
1137                manifest = self._agent_manifests.get(target_name, {})
1138                if manifest.get("spawnable") and manifest.get("catalog"):
1139                    catalog_name  = manifest["catalog"]
1140                    catalog_actor = self._registry.find_by_name(catalog_name) if self._registry else None
1141                    if catalog_actor and hasattr(catalog_actor, "_action_spawn"):
1142                        logger.info(f"[main] '{target_name}' not running — auto-spawning via {catalog_name}...")
1143                        try:
1144                            spawn_result = await catalog_actor._action_spawn(target_name, {})
1145                            if spawn_result and spawn_result.get("ok"):
1146                                await asyncio.sleep(0.5)
1147                                local_target = self._registry.find_by_name(target_name) if self._registry else None
1148                                logger.info(f"[main] '{target_name}' spawned, routing task...")
1149                            else:
1150                                err = spawn_result.get("message", "unknown error") if spawn_result else "no response"
1151                                return note_prefix + f"Could not spawn '{target_name}': {err}"
1152                        except Exception as e:
1153                            return note_prefix + f"Could not spawn '{target_name}': {e}"
1154
1155            if local_target:
1156                result = await self.delegate_task(target_name, message, timeout=60.0)
1157                if result:
1158                    reply = result.get("result") or result.get("response") or str(result)
1159                    return note_prefix + f"**{target_name}**: {reply}"
1160                return note_prefix + f"{target_name} did not respond."
1161
1162            # Check if it's a known remote agent
1163            remote_node = None
1164            for node_name, nd in self._known_nodes.items():
1165                if target_name in nd.get("agents", []):
1166                    remote_node = node_name
1167                    break
1168
1169            if remote_node:
1170                # Send via MQTT and wait for reply
1171                import time as _t
1172                reply_topic = f"main/reply/{self.actor_id}/{uuid.uuid4().hex[:8]}"
1173                future: asyncio.Future = asyncio.get_event_loop().create_future()
1174                self._result_futures[reply_topic] = future
1175
1176                await self._mqtt_publish(
1177                    f"agents/by-name/{target_name}/task",
1178                    {"text": message, "_reply_topic": reply_topic,
1179                     "_remote_task": True, "payload": message},
1180                )
1181
1182                # Subscribe briefly for the reply
1183                async def _wait_reply():
1184                    try:
1185                        import aiomqtt
1186                        async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client:
1187                            await client.subscribe(reply_topic)
1188                            async for msg in client.messages:
1189                                try:
1190                                    data = json.loads(msg.payload.decode())
1191                                    if not future.done():
1192                                        future.set_result(data)
1193                                except Exception:
1194                                    pass
1195                                return
1196                    except Exception as e:
1197                        if not future.done():
1198                            future.set_exception(e)
1199
1200                reply_task = asyncio.create_task(_wait_reply())
1201                try:
1202                    result = await asyncio.wait_for(asyncio.shield(future), timeout=30.0)
1203                    reply_task.cancel()
1204                    reply = result.get("result") or result.get("response") or str(result)
1205                    return note_prefix + f"**{target_name}** (on {remote_node}): {reply}"
1206                except asyncio.TimeoutError:
1207                    reply_task.cancel()
1208                    return note_prefix + f"{target_name} on {remote_node} did not respond within 30s."
1209                finally:
1210                    self._result_futures.pop(reply_topic, None)
1211
1212            # Not found locally or remotely
1213            known_remote = [a for nd in self._known_nodes.values() for a in nd.get("agents", [])]
1214            if known_remote:
1215                return note_prefix + (f"Agent '{target_name}' not found. "
1216                    f"Remote agents: {', '.join(known_remote)}")
1217            return note_prefix + f"Agent '{target_name}' not found."
1218
1219        # Explicit planner prefix always wins
1220        lowered = text.lower()
1221        if any(lowered.startswith(p) for p in (
1222            "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ",
1223            "@planner", "set up a pipeline", "create a rule", "set up a rule",
1224        )):
1225            result = await self._run_planner(text)
1226            return note_prefix + (result or "Planner did not return a result. Please retry.")
1227
1228        # Single LLM call classifies intent: HA (direct action), PIPELINE (reactive rule), OTHER
1229        intent = await self._classify_intent(text)
1230        logger.info(f"[{self.name}] Intent: {intent}{text[:60]}")
1231
1232        if intent == "PIPELINE":
1233            result = await self._run_planner(text)
1234            return note_prefix + (result or "Planner did not return a result. Please retry.")
1235
1236        if intent == "HA":
1237            result = await self.delegate_task("home-assistant-agent", text, timeout=120.0)
1238            if result and isinstance(result, dict) and result.get("result"):
1239                return note_prefix + str(result["result"])
1240            if not result:
1241                return note_prefix + "I could not reach the Home Assistant agent right now. Please retry."
1242            return note_prefix + "The Home Assistant agent did not return a result. Please retry."
1243
1244        response = await self.chat(text)
1245
1246        # If the LLM wrote agent code but forgot the <spawn> wrapper, remind it once
1247        has_spawn   = "<spawn>" in response
1248        has_code    = "async def handle_task" in response or "async def setup" in response
1249        asked_spawn = any(w in text.lower() for w in ("spawn", "create", "make", "build", "add", "agent"))
1250        if has_code and not has_spawn and asked_spawn:
1251            logger.info(f"[{self.name}] Code written without <spawn> — prompting to wrap it")
1252            response = await self.chat(
1253                "You wrote agent code but forgot to wrap it in a <spawn> block. "
1254                "Please output the complete spawn block now with that exact code inside it. "
1255                "Output ONLY the <spawn>...</spawn> block, nothing else."
1256            )
1257
1258        clean, spawned = await self._process_spawn_commands(response)
1259
1260        # Execute any @agent-name {payload} delegation patterns the LLM produced
1261        clean = await self._execute_llm_delegations(clean)
1262
1263        await self._mqtt_publish(
1264            f"agents/{self.actor_id}/logs",
1265            {"type": "user_interaction", "input": text[:100], "response": clean[:200]},
1266        )
1267
1268        if spawned:
1269            bg_names   = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)]
1270            live_names = [a.name for a in spawned if not isinstance(a, _SpawnPlaceholder)]
1271            parts = []
1272            if live_names:
1273                replaced = '"replace": true' in response or '"replace":true' in response
1274                action   = "Replaced" if replaced else "Spawned"
1275                parts.append(f"{action} {', '.join(live_names)}")
1276            if bg_names:
1277                parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly")
1278            if parts:
1279                clean += f"\n\n[System: {' | '.join(parts)} — will auto-restore on restart]"
1280
1281        return note_prefix + clean
1282
1283    async def process_user_input_stream(self, text: str):
1284        """
1285        Streaming version of process_user_input().
1286        Yields text chunks as the LLM generates them, then a final dict:
1287          {"done": True, "spawned": [...names...], "system_msg": "..."}
1288
1289        The CLI calls this and prints chunks immediately.
1290        REST/Discord/WhatsApp should use process_user_input() instead.
1291        """
1292        # Drain monitor notifications first
1293        note_prefix = self._drain_notifications()
1294        if note_prefix:
1295            yield note_prefix
1296
1297        # All slash-commands and direct API intercepts are handled by process_user_input
1298        # Route them there to avoid duplicating all that logic here
1299        _stripped = text.strip().rstrip("()")
1300        _is_command = (
1301            _stripped.startswith("/")
1302            or _stripped in ("list_nodes", "main.list_nodes", "rules")
1303            or _stripped.startswith("@")
1304        )
1305        if _is_command:
1306            result = await self.process_user_input(text)
1307            yield result
1308            yield {"done": True, "spawned": [], "system_msg": ""}
1309            return
1310
1311        # Explicit planner prefix always wins
1312        _lowered = text.lower()
1313        if any(_lowered.startswith(p) for p in (
1314            "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ",
1315            "@planner", "set up a pipeline", "create a rule", "set up a rule",
1316        )):
1317            result = await self._run_planner(text)
1318            yield result or "Planner did not return a result. Please retry."
1319            yield {"done": True, "spawned": [], "system_msg": ""}
1320            return
1321
1322        # Single LLM call classifies intent: HA, PIPELINE, or OTHER
1323        intent = await self._classify_intent(text)
1324        logger.info(f"[{self.name}] Intent: {intent}{text[:60]}")
1325
1326        if intent == "PIPELINE":
1327            result = await self._run_planner(text)
1328            yield result or "Planner did not return a result. Please retry."
1329            yield {"done": True, "spawned": [], "system_msg": ""}
1330            return
1331
1332        if intent == "HA":
1333            result = await self.delegate_task("home-assistant-agent", text, timeout=120.0)
1334            if result and isinstance(result, dict) and result.get("result"):
1335                yield str(result["result"])
1336            elif not result:
1337                yield "I could not reach the Home Assistant agent right now. Please retry."
1338            else:
1339                yield "The Home Assistant agent did not return a result. Please retry."
1340            yield {"done": True, "spawned": [], "system_msg": ""}
1341            return
1342
1343        # Stream the LLM response chunk by chunk
1344        full_chunks = []
1345        async for chunk in self.chat_stream(text):
1346            if isinstance(chunk, dict):
1347                break   # usage dict — discard, already tracked inside chat_stream
1348            full_chunks.append(chunk)
1349            yield chunk
1350
1351        full_response = "".join(full_chunks)
1352
1353        # Process any <spawn> blocks in the completed response
1354        _, spawned = await self._process_spawn_commands(full_response)
1355
1356        # Execute any @agent-name {payload} delegation patterns the LLM produced
1357        # If delegations ran, yield the results as an additional chunk
1358        delegated = await self._execute_llm_delegations(full_response)
1359        if delegated != full_response:
1360            # Find what changed and yield just the new parts
1361            import re as _re
1362            results = _re.findall(r'[✅❌]\s+\S+.*', delegated)
1363            if results:
1364                yield "\n" + "\n".join(results)
1365        full_response = delegated
1366
1367        system_msg = ""
1368        if spawned:
1369            names      = ", ".join(f"'{a.name}'" for a in spawned if not isinstance(a, _SpawnPlaceholder))
1370            bg_names   = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)]
1371            parts = []
1372            if names:
1373                replaced = '"replace": true' in full_response or '"replace":true' in full_response
1374                parts.append(f"{'Replaced' if replaced else 'Spawned'} {names} — will auto-restore on restart")
1375            if bg_names:
1376                parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly")
1377            system_msg = " | ".join(parts)
1378
1379        await self._mqtt_publish(
1380            f"agents/{self.actor_id}/logs",
1381            {"type": "user_interaction", "input": text[:100], "response": full_response[:200]},
1382        )
1383
1384        yield {"done": True, "spawned": spawned, "system_msg": system_msg}
1385
1386    # ── Planner ────────────────────────────────────────────────────────────
1387
1388    _PLANNING_KEYWORDS = [
1389        # Coordination signals
1390        "and then", "after that", "also", "combine", "compare",
1391        "coordinate", "plan", "pipeline", "orchestrate", "summarize both",
1392        "using multiple", "all agents", "several agents",
1393        # Multi-step / multi-domain signals
1394        "first.*then", "step by step", "in order",
1395        "weather.*news", "news.*weather", "manual.*code", "search.*analyze",
1396        # Reactive pipeline signals
1397        "if.*then", "when.*send", "when.*turn", "when.*open", "when.*close",
1398        "whenever", "monitor.*and", "watch.*and", "detect.*and",
1399        "notify me", "alert me", "automatically",
1400    ]
1401
1402    async def _needs_planning(self, text: str) -> bool:
1403        """
1404        Heuristic: does this task benefit from multi-agent coordination?
1405        Keeps main fast — only escalates genuinely complex requests.
1406        """
1407        import re
1408        lowered = text.lower()
1409
1410        # Explicit user request for coordination
1411        if any(w in lowered for w in (
1412            "coordinate:", "plan:", "pipeline:", "@planner",
1413            "ask the planner", "use the planner", "create a pipeline",
1414            "set up a pipeline", "create a rule", "set up a rule",
1415        )):
1416            return True
1417
1418        # Keyword heuristic — multiple signals needed to avoid false positives
1419        hits = sum(1 for kw in self._PLANNING_KEYWORDS if re.search(kw, lowered))
1420        if hits >= 2:
1421            return True
1422
1423        # References two or more known agent names
1424        if self._registry:
1425            agent_names = [a.name for a in self._registry.all_actors()
1426                           if a.name not in {"main", "monitor", "installer"}]
1427            mentioned = sum(1 for name in agent_names if name in lowered)
1428            if mentioned >= 2:
1429                return True
1430
1431        return False
1432
1433    async def _run_planner(self, task: str) -> Optional[str]:
1434        """Spawn a PlannerAgent, hand it the task, wait for the result."""
1435        from .planner_agent import PlannerAgent
1436        import uuid
1437
1438        # Enrich vague follow-up tasks with recent conversation context
1439        # so the planner has the full picture (e.g. which entity was found)
1440        enriched_task = task
1441        if self._conversation_history and len(task.split()) < 15:
1442            # Short/vague task — inject last 3 exchanges as context
1443            recent = self._conversation_history[-6:]  # 3 user+assistant pairs
1444            ctx_lines = []
1445            for m in recent:
1446                role    = "User" if m["role"] == "user" else "Assistant"
1447                content = str(m["content"])[:300]
1448                ctx_lines.append(f"{role}: {content}")
1449            if ctx_lines:
1450                enriched_task = (
1451                    f"{task}\n\n"
1452                    f"[Context from recent conversation:]\n"
1453                    + "\n".join(ctx_lines)
1454                )
1455
1456        planner_name = f"planner-{uuid.uuid4().hex[:6]}"
1457        logger.info(f"[{self.name}] Spawning planner '{planner_name}' for: {enriched_task[:60]}")
1458
1459        await self._mqtt_publish(
1460            f"agents/{self.actor_id}/logs",
1461            {"type": "log", "message": f"Complex task detected — spawning planner...", "timestamp": __import__('time').time()},
1462        )
1463
1464        task_id = f"plan_{uuid.uuid4().hex[:8]}"
1465        future: asyncio.Future = asyncio.get_running_loop().create_future()
1466        self._result_futures[task_id] = future
1467
1468        try:
1469            planner = await self.spawn(
1470                PlannerAgent,
1471                name=planner_name,
1472                llm_provider=self.llm,
1473                task=enriched_task,
1474                reply_to_id=self.actor_id,
1475                reply_task_id=task_id,
1476                auto_terminate=True,
1477                persistence_dir=str(self._persistence_dir.parent),
1478            )
1479            if not planner:
1480                return None
1481
1482            result_payload = await asyncio.wait_for(future, timeout=180.0)
1483            answer = result_payload.get("result") or result_payload.get("text") or ""
1484            spawned_names = result_payload.get("spawned", [])
1485            if spawned_names:
1486                answer += f"\n\n[System: Planner created new agents: {', '.join(spawned_names)} — saved for future use]"
1487            return answer
1488
1489        except asyncio.TimeoutError:
1490            logger.warning(f"[{self.name}] Planner timed out for: {task[:60]}")
1491            return "The pipeline is taking longer than expected to set up. Check `/rules` in a moment to see if agents were spawned, or try again."
1492        except Exception as e:
1493            logger.error(f"[{self.name}] Planner error: {e}")
1494            return None
1495        finally:
1496            self._result_futures.pop(task_id, None)
1497
1498        # ── Spawn ──────────────────────────────────────────────────────────────
1499
1500    async def _execute_llm_delegations(self, response: str) -> str:
1501        """
1502        Scan the LLM response for @agent-name {json} delegation patterns and execute them.
1503        Replaces the pattern in the response with the actual result.
1504
1505        Matches lines like:
1506            @doc-to-pptx-agent {"file_path": "...", "output_path": "..."}
1507            @weather-agent {"city": "Athens"}
1508        """
1509        import re
1510
1511        # Find @agent-name then scan for the matching { } block manually
1512        # (regex alone can't handle } inside string values reliably)
1513        delegations = []   # list of (full_match_str, agent_name, payload_dict)
1514
1515        for m in re.finditer(r'@([\w][\w\-]*)\s+(\{)', response):
1516            agent_name = m.group(1)
1517            if agent_name == self.name:
1518                continue
1519            start = m.start(2)   # position of opening {
1520            depth = 0
1521            end   = start
1522            for i, ch in enumerate(response[start:], start):
1523                if ch == '{':
1524                    depth += 1
1525                elif ch == '}':
1526                    depth -= 1
1527                    if depth == 0:
1528                        end = i + 1
1529                        break
1530            if depth != 0:
1531                continue   # unmatched braces — skip
1532            json_str = response[start:end]
1533            try:
1534                payload = json.loads(json_str)
1535            except json.JSONDecodeError:
1536                continue
1537            delegations.append((response[m.start():end], agent_name, payload))
1538
1539        replacements = []
1540        for full_match, agent_name, payload in delegations:
1541            # Check if agent is running, if not auto-spawn via catalog
1542            target = self._registry.find_by_name(agent_name) if self._registry else None
1543            if not target:
1544                manifest = self._agent_manifests.get(agent_name, {})
1545                if manifest.get("spawnable") and manifest.get("catalog"):
1546                    catalog_actor = self._registry.find_by_name(manifest["catalog"]) if self._registry else None
1547                    if catalog_actor and hasattr(catalog_actor, "_action_spawn"):
1548                        logger.info(f"[{self.name}] Auto-spawning '{agent_name}' via catalog...")
1549                        try:
1550                            spawn_result = await catalog_actor._action_spawn(agent_name, {})
1551                            if spawn_result and spawn_result.get("ok"):
1552                                await asyncio.sleep(0.5)
1553                                target = self._registry.find_by_name(agent_name) if self._registry else None
1554                                logger.info(f"[{self.name}] '{agent_name}' spawned successfully")
1555                            else:
1556                                err = spawn_result.get("message", "unknown") if spawn_result else "no response"
1557                                logger.warning(f"[{self.name}] Spawn failed for '{agent_name}': {err}")
1558                        except Exception as e:
1559                            logger.error(f"[{self.name}] Spawn error for '{agent_name}': {e}")
1560
1561            if not target:
1562                replacements.append((full_match, f"[Could not reach {agent_name}]"))
1563                continue
1564
1565            json_str = json.dumps(payload)
1566            logger.info(f"[{self.name}] Executing LLM delegation → @{agent_name} {json_str[:80]}")
1567            try:
1568                result = await self.delegate_task(agent_name, json_str, timeout=300.0)
1569                if result:
1570                    if isinstance(result, dict):
1571                        error = result.get("error")
1572                        if error:
1573                            result_str = f"❌ {agent_name} failed: {error}"
1574                        else:
1575                            for key in ("pptx_path", "image_path", "result", "message", "output", "text"):
1576                                if result.get(key):
1577                                    result_str = f"✅ {agent_name} completed: {key}={result[key]}"
1578                                    break
1579                            else:
1580                                result_str = f"✅ {agent_name} completed: {result}"
1581                    else:
1582                        result_str = f"✅ {agent_name}: {result}"
1583                else:
1584                    result_str = f"[{agent_name} did not respond]"
1585            except Exception as e:
1586                result_str = f"[{agent_name} error: {e}]"
1587
1588            replacements.append((full_match, result_str))
1589
1590        # Apply replacements
1591        for original, replacement in replacements:
1592            response = response.replace(original, replacement)
1593
1594        return response
1595
1596    @staticmethod
1597    def _parse_spawn_config(raw: str) -> dict:
1598        """
1599        Robustly parse a spawn config that may contain raw multiline code strings.
1600        Uses character scanning to correctly handle } and " inside the code value.
1601        """
1602        raw = raw.strip()
1603
1604        # Strategy 1: standard JSON (works when LLM properly escapes newlines)
1605        try:
1606            return json.loads(raw)
1607        except json.JSONDecodeError:
1608            pass
1609
1610        # Strategy 2: backtick-delimited code (rare but some LLMs use it)
1611        bt_match = re.search(r'"code"\s*:\s*`(.*?)`', raw, re.DOTALL)
1612        if bt_match:
1613            code_raw    = bt_match.group(1)
1614            placeholder = re.sub(r'"code"\s*:\s*`.*?`', '"code": "__CODE__"', raw, flags=re.DOTALL)
1615            config      = json.loads(placeholder)
1616            config["code"] = code_raw
1617            return config
1618
1619        # Strategy 3: character scanner — find opening " after "code":
1620        # then scan forward respecting escape sequences to find the real closing "
1621        # This correctly handles } and { inside the code value.
1622        key_match = re.search(r'"code"\s*:\s*"', raw)
1623        if not key_match:
1624            raise ValueError(f"No 'code' key found in spawn config:\n{raw[:200]}")
1625
1626        code_start = key_match.end()   # index right after the opening "
1627        i = code_start
1628        while i < len(raw):
1629            if raw[i] == '\\':
1630                i += 2             # skip escaped character
1631                continue
1632            if raw[i] == '"':
1633                break              # found unescaped closing quote
1634            i += 1
1635
1636        code_raw    = raw[code_start:i]
1637        placeholder = raw[:key_match.start()] + '"code": "__CODE__"' + raw[i+1:]
1638
1639        try:
1640            config = json.loads(placeholder)
1641        except json.JSONDecodeError as e:
1642            raise ValueError(f"Spawn config JSON invalid after code extraction: {e}\nPlaceholder:\n{placeholder[:300]}")
1643
1644        # Unescape sequences the LLM may have added
1645        config["code"] = (code_raw
1646                          .replace("\\n", "\n")
1647                          .replace('\\"', '"')
1648                          .replace("\\t", "\t"))
1649        return config
1650
1651    async def _process_spawn_commands(self, response: str):
1652        spawned = []
1653        pattern = r'<spawn>(.*?)</spawn>'
1654
1655        for match in re.findall(pattern, response, re.DOTALL):
1656            try:
1657                config = self._parse_spawn_config(match.strip())
1658                # LLM agents have no "code" — only check for code if type is dynamic
1659                agent_type = config.get("type", "dynamic")
1660                has_code   = bool(config.get("code", "").strip())
1661                has_prompt = bool(config.get("system_prompt", "").strip())
1662                if agent_type == "dynamic" and not has_code:
1663                    logger.error(f"[{self.name}] Dynamic agent has no code: {config.get('name')}")
1664                    continue
1665                if agent_type == "llm" and not has_prompt:
1666                    logger.warning(f"[{self.name}] LLM agent has no system_prompt, using default: {config.get('name')}")
1667                actor = await self._spawn_from_config(config, save=True)
1668                if actor:
1669                    spawned.append(actor)
1670            except Exception as e:
1671                logger.error(f"[{self.name}] Spawn failed: {e}\nRaw block:\n{match[:500]}")
1672
1673        clean = re.sub(pattern, '', response, flags=re.DOTALL).strip()
1674        return clean, spawned
1675
1676    async def _spawn_from_config(self, config: dict, save: bool = True) -> Optional[Actor]:
1677        name = config.get("name", "dynamic-agent")
1678        node = config.get("node", "").strip()
1679
1680        # Remote spawn — publish to the node's spawn topic via MQTT
1681        if node:
1682            return await self._spawn_remote(config, node, save)
1683
1684        # Local spawn
1685        from .dynamic_agent import DynamicAgent
1686
1687        existing = self._registry.find_by_name(name) if self._registry else None
1688        replace  = config.get("replace", False)
1689
1690        if existing:
1691            if not replace:
1692                logger.info(f"[{self.name}] '{name}' already exists (use replace=true to update).")
1693                return existing
1694            # Stop the old agent cleanly before spawning the replacement
1695            logger.info(f"[{self.name}] Replacing '{name}' with updated code...")
1696            try:
1697                if self._registry:
1698                    await self._registry.unregister(existing.actor_id)
1699                await existing.stop()
1700                await asyncio.sleep(0.5)
1701            except Exception as e:
1702                logger.warning(f"[{self.name}] Error stopping old '{name}': {e}")
1703
1704        agent_type    = config.get("type", "dynamic")
1705        code          = config.get("code", "").strip()
1706        system_prompt = config.get("system_prompt", "").strip()
1707
1708        # Route to the right agent class
1709        if agent_type == "ha_actuator":
1710            actor = await self._spawn_ha_actuator(config, name)
1711        elif agent_type == "manual" or name == "manual-agent":
1712            actor = await self._spawn_manual_agent(config, name)
1713        elif agent_type == "llm" or (not code and system_prompt):
1714            actor = await self._spawn_llm_agent(config, name)
1715        elif code:
1716            actor = await self._spawn_dynamic_agent(config, name, code)
1717        else:
1718            logger.warning(f"[{self.name}] Spawn config for '{name}' has neither code nor system_prompt.")
1719            return None
1720
1721        if actor and save:
1722            self._save_to_spawn_registry(config)
1723
1724        return actor
1725
1726    async def _spawn_ha_actuator(self, config: dict, name: str):
1727        """Spawn a HomeAssistantActuatorAgent from a spawn block with type: ha_actuator."""
1728        from .home_assistant_actuator_agent import (
1729            HomeAssistantActuatorAgent, ActuatorConfig, ActuatorAction, ActuatorCondition,
1730        )
1731        import hashlib as _hl
1732
1733        # Ensure unique name if collision
1734        if self._registry and self._registry.find_by_name(name):
1735            suffix = _hl.md5(f"{name}{__import__('time').time()}".encode()).hexdigest()[:4]
1736            name   = f"{name}-{suffix}"
1737
1738        automation_id = config.get("automation_id", name)
1739        actuator_cfg  = ActuatorConfig(
1740            automation_id    = automation_id,
1741            description      = config.get("description", ""),
1742            mqtt_topics      = config.get("mqtt_topics", []),
1743            actions          = [ActuatorAction.from_dict(a) for a in config.get("actions", [])],
1744            conditions       = [ActuatorCondition.from_dict(c) for c in config.get("conditions", [])],
1745            detection_filter = config.get("detection_filter"),
1746            cooldown_seconds = float(config.get("cooldown_seconds", 10.0)),
1747        )
1748        logger.info(f"[{self.name}] Spawning HomeAssistantActuatorAgent '{name}'")
1749        actor = await self.spawn(
1750            HomeAssistantActuatorAgent,
1751            config          = actuator_cfg,
1752            name            = name,
1753            persistence_dir = str(self._persistence_dir.parent),
1754        )
1755        return actor
1756
1757    async def _spawn_manual_agent(self, config: dict, name: str):
1758        """Spawn the pre-defined ManualAgent — robust PDF manual search and Q&A."""
1759        from .manual_agent import ManualAgent
1760        logger.info(f"[{self.name}] Spawning ManualAgent '{name}'")
1761        actor = await self.spawn(
1762            ManualAgent,
1763            name=name,
1764            llm_provider=self.llm,
1765            persistence_dir=str(self._persistence_dir.parent),
1766        )
1767        return actor
1768
1769    async def _spawn_llm_agent(self, config: dict, name: str):
1770        """Spawn a proper LLMAgent — best for chat, Q&A, reasoning tasks."""
1771        from .llm_agent import LLMAgent
1772        system_prompt = config.get("system_prompt", "You are a helpful assistant.")
1773        logger.info(f"[{self.name}] Spawning LLM agent '{name}'")
1774        actor = await self.spawn(
1775            LLMAgent,
1776            name=name,
1777            llm_provider=self.llm,
1778            system_prompt=system_prompt,
1779            persistence_dir=str(self._persistence_dir.parent),
1780        )
1781        return actor
1782
1783    async def _spawn_dynamic_agent(self, config: dict, name: str, code: str):
1784        """Spawn a DynamicAgent — best for data pipelines, sensors, tools."""
1785        packages = config.get("install", [])
1786        if isinstance(packages, str):
1787            packages = [p.strip() for p in packages.replace(",", " ").split()]
1788
1789        if packages:
1790            # Install and spawn in a background task so we don't block the user
1791            logger.info(f"[{self.name}] Scheduling background install+spawn for '{name}': {packages}")
1792            asyncio.create_task(self._install_then_spawn(config, name, code, packages))
1793            # Return a placeholder so the caller knows spawn is in progress
1794            return _SpawnPlaceholder(name)
1795        else:
1796            return await self._do_spawn_dynamic(config, name, code)
1797
1798    async def _install_then_spawn(self, config: dict, name: str, code: str, packages: list):
1799        """Background task: install packages then spawn the agent."""
1800        try:
1801            await self._mqtt_publish(
1802                f"agents/{self.actor_id}/logs",
1803                {"type": "log", "message": f"Installing {packages} for {name}...", "timestamp": __import__("time").time()},
1804            )
1805            await self._install_packages(packages)
1806            actor = await self._do_spawn_dynamic(config, name, code)
1807            if actor:
1808                self._save_to_spawn_registry(config)
1809                await self._mqtt_publish(
1810                    f"agents/{self.actor_id}/logs",
1811                    {"type": "spawned", "message": f"'{name}' spawned after install", "child_name": name, "timestamp": __import__("time").time()},
1812                )
1813                logger.info(f"[{self.name}] Background spawn complete: {name}")
1814        except Exception as e:
1815            logger.error(f"[{self.name}] Background install+spawn failed for '{name}': {e}")
1816
1817    async def _do_spawn_dynamic(self, config: dict, name: str, code: str):
1818        """Actually create and start the DynamicAgent."""
1819        from .dynamic_agent import DynamicAgent
1820        actor = await self.spawn(
1821            DynamicAgent,
1822            name=name,
1823            code=code,
1824            poll_interval=float(config.get("poll_interval", 1.0)),
1825            description=config.get("description", ""),
1826            input_schema=config.get("input_schema", {}),
1827            output_schema=config.get("output_schema", {}),
1828            llm_provider=self.llm,
1829            persistence_dir=str(self._persistence_dir.parent),
1830        )
1831        return actor
1832
1833    async def _install_packages(self, packages: list[str]):
1834        """Delegate package installation to the installer agent."""
1835        if not self._registry:
1836            return
1837
1838        # Fast path: check which packages actually need installing
1839        import importlib, sys
1840        needed = []
1841        for pkg in packages:
1842            import_name = pkg.replace("-", "_").split("[")[0]
1843            try:
1844                importlib.import_module(import_name)
1845            except ImportError:
1846                needed.append(pkg)
1847        if not needed:
1848            logger.info(f"[{self.name}] All packages already available: {packages} — skipping install")
1849            return
1850
1851        installer = self._registry.find_by_name("installer")
1852        if not installer:
1853            logger.warning(f"[{self.name}] installer agent not found — skipping install of {needed}")
1854            return
1855        logger.info(f"[{self.name}] Installing packages via installer: {needed}")
1856        import uuid
1857        task_id = f"install_{uuid.uuid4().hex[:8]}"
1858        future = asyncio.get_event_loop().create_future()
1859        self._result_futures[task_id] = future
1860        await self.send(installer.actor_id, MessageType.TASK, {
1861            "action": "install",
1862            "packages": needed,
1863            "task": task_id,
1864            "_task_id": task_id,
1865            "reply_to": self.actor_id,
1866        })
1867        try:
1868            result = await asyncio.wait_for(future, timeout=120.0)
1869            logger.info(f"[{self.name}] Install result: {result.get('message', result)}")
1870            if result.get("failed"):
1871                logger.warning(f"[{self.name}] Failed to install: {result['failed']}")
1872        except asyncio.TimeoutError:
1873            logger.warning(f"[{self.name}] Package install timed out for {needed}")
1874        finally:
1875            self._result_futures.pop(task_id, None)
1876
1877    async def run_pipeline(self, goal: str, agents: list[str], timeout: float = 300.0, force_replan: bool = False) -> dict:
1878        """
1879        Spawn an ephemeral TaskManager to coordinate a multi-agent pipeline.
1880        Returns the final synthesised result without blocking main's context.
1881
1882        Usage:
1883            result = await main.run_pipeline(
1884                goal="Find the Philips EP2220 manual and answer: how do I descale it?",
1885                agents=["manual-agent", "installer"]
1886            )
1887        """
1888        from .task_manager import TaskManager
1889        import uuid
1890
1891        task_id = uuid.uuid4().hex[:8]
1892        future  = asyncio.get_event_loop().create_future()
1893        self._result_futures[task_id] = future
1894
1895        mgr = await self.spawn(
1896            TaskManager,
1897            goal=goal,
1898            available_agents=agents,
1899            llm_provider=self.llm,
1900            reply_to_id=self.actor_id,
1901            reply_task_id=task_id,
1902            auto_destroy=True,
1903            force_replan=force_replan,
1904            cache_dir=str(self._persistence_dir.parent / "plan_cache"),
1905            persistence_dir=str(self._persistence_dir.parent),
1906        )
1907
1908        logger.info(f"[{self.name}] Pipeline started: {mgr.name} for goal: {goal[:60]}")
1909
1910        try:
1911            result = await asyncio.wait_for(future, timeout=timeout)
1912            return result
1913        except asyncio.TimeoutError:
1914            logger.warning(f"[{self.name}] Pipeline timed out after {timeout}s")
1915            return {"error": f"Pipeline timed out after {timeout}s"}
1916        finally:
1917            self._result_futures.pop(task_id, None)
1918
1919    async def _spawn_remote(self, config: dict, node: str, save: bool) -> None:
1920        """
1921        Publish a spawn command to a remote node via MQTT.
1922        The remote_runner.py on that machine will receive it and run the agent.
1923        Remote agents appear in the dashboard exactly like local ones
1924        because they connect to the same MQTT broker.
1925
1926        Also updates nodes/{node}/desired_state (retained) with ALL agents for
1927        this node so the runner can self-heal after a reboot.
1928        """
1929        name = config.get("name", "remote-agent")
1930        logger.info(f"[{self.name}] Spawning '{name}' on remote node '{node}'")
1931
1932        # Publish individual spawn (for immediate delivery)
1933        await self._mqtt_publish(
1934            f"nodes/{node}/spawn",
1935            config,
1936            retain=True,
1937            qos=1,
1938        )
1939
1940        # Update desired state for the whole node (retained — survives Pi reboot)
1941        await self._update_node_desired_state(node, config)
1942
1943        await self._mqtt_publish(
1944            f"agents/{self.actor_id}/logs",
1945            {"type": "spawned", "message": f"Spawned '{name}' on node '{node}'",
1946             "child_name": name, "node": node, "timestamp": __import__("time").time()}
1947        )
1948
1949        if save:
1950            self._save_to_spawn_registry(config)
1951
1952        return None
1953
1954    async def _update_node_desired_state(self, node: str, new_config: dict = None,
1955                                          remove_name: str = None) -> None:
1956        """
1957        Maintain nodes/{node}/desired_state as a retained MQTT message containing
1958        ALL agents that should run on this node. The runner reads this on startup
1959        and reconciles — spawning missing agents, ignoring already-running ones.
1960        """
1961        # Build desired state from spawn registry filtered to this node
1962        reg = self._get_spawn_registry()
1963        agents = {
1964            name: cfg for name, cfg in reg.items()
1965            if cfg.get("node", "").strip() == node
1966        }
1967
1968        # Apply pending change before publishing
1969        if new_config:
1970            agents[new_config["name"]] = new_config
1971        if remove_name:
1972            agents.pop(remove_name, None)
1973
1974        await self._mqtt_publish(
1975            f"nodes/{node}/desired_state",
1976            {"node": node, "agents": list(agents.values()),
1977             "timestamp": __import__("time").time()},
1978            retain=True,
1979            qos=1,
1980        )
1981        logger.info(f"[{self.name}] Desired state for '{node}': {list(agents.keys())}")
1982
1983    # ── Node registry ──────────────────────────────────────────────────────
1984
1985    def list_nodes(self) -> list[dict]:
1986        """Return all known remote nodes with their last-seen time and running agents."""
1987        import time as _time
1988        now = _time.time()
1989        return [
1990            {
1991                "node":      name,
1992                "agents":    info.get("agents", []),
1993                "last_seen": info.get("last_seen", 0),
1994                "online":    (now - info.get("last_seen", 0)) < 30,
1995            }
1996            for name, info in self._known_nodes.items()
1997        ]
1998
1999    def list_topics(self, keyword: str = "") -> list[dict]:
2000        """
2001        Return all known MQTT topics published by agents, optionally filtered by keyword.
2002        Each entry: {"topic": str, "agents": [{"name", "node", "description"}, ...]}
2003
2004        Example:
2005            list_topics("cpu")     → topics containing "cpu"
2006            list_topics("temp")    → topics containing "temp"
2007            list_topics()          → all topics
2008        """
2009        results = []
2010        kw = keyword.lower()
2011        for topic, manifests in self._topic_registry.items():
2012            if kw and kw not in topic.lower():
2013                continue
2014            results.append({
2015                "topic":   topic,
2016                "agents":  [{"name": m.get("name"), "node": m.get("node"),
2017                             "description": m.get("description", "")} for m in manifests],
2018            })
2019        return sorted(results, key=lambda x: x["topic"])
2020
2021    def list_capabilities(self, keyword: str = "") -> list[dict]:
2022        """
2023        Return all known agents with their full capability profile:
2024        name, description, capabilities, input_schema, output_schema.
2025
2026        Example:
2027            list_capabilities()            → all agents
2028            list_capabilities("weather")   → agents with "weather" in description/capabilities
2029        """
2030        results = []
2031        kw = keyword.lower().strip()
2032        # Support multi-word keywords — match if ANY word appears in the haystack
2033        kw_words = kw.split() if kw else []
2034        for name, manifest in self._agent_manifests.items():
2035            desc  = manifest.get("description", "")
2036            caps  = manifest.get("capabilities", [])
2037            # Filter by keyword across description, capabilities, and name
2038            if kw_words:
2039                haystack = desc.lower() + " " + " ".join(caps).lower() + " " + name.lower()
2040                if not any(w in haystack for w in kw_words):
2041                    continue
2042            results.append({
2043                "name":          name,
2044                "node":          manifest.get("node"),
2045                "description":   desc,
2046                "capabilities":  caps,
2047                "input_schema":  manifest.get("input_schema",  {}),
2048                "output_schema": manifest.get("output_schema", {}),
2049                "spawnable":     manifest.get("spawnable", False),
2050                "running":       bool(self._registry and self._registry.find_by_name(name)),
2051            })
2052        return sorted(results, key=lambda x: x["name"])
2053
2054    async def _manifest_listener(self):
2055        """
2056        Subscribe to agents/+/manifest and build a searchable topic registry.
2057        Retained manifests are delivered immediately on subscribe so the registry
2058        is populated even for agents that started before main restarted.
2059        """
2060        try:
2061            import aiomqtt
2062        except ImportError:
2063            return
2064
2065        while self.state.value not in ("stopped", "failed"):
2066            try:
2067                async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client:
2068                    await client.subscribe("agents/+/manifest")
2069                    logger.info("[main] Subscribed to agent manifests.")
2070                    async for msg in client.messages:
2071                        try:
2072                            data = json.loads(msg.payload.decode())
2073                        except Exception:
2074                            continue
2075                        if not isinstance(data, dict):
2076                            continue
2077                        agent_name = data.get("name", "?")
2078                        published  = data.get("publishes", [])
2079                        # Update topic registry
2080                        for topic in published:
2081                            existing = self._topic_registry.setdefault(topic, [])
2082                            # Replace existing entry for this agent or append
2083                            updated = False
2084                            for i, m in enumerate(existing):
2085                                if m.get("name") == agent_name:
2086                                    existing[i] = data
2087                                    updated = True
2088                                    break
2089                            if not updated:
2090                                existing.append(data)
2091                        # Also store full manifest by agent name for capability queries
2092                        self._agent_manifests[agent_name] = data
2093                        logger.debug(f"[main] Manifest from '{agent_name}': {published}")
2094            except asyncio.CancelledError:
2095                break
2096            except Exception as e:
2097                if self.state.value not in ("stopped", "failed"):
2098                    logger.warning(f"[main] Manifest listener error: {e}. Reconnecting in 5s…")
2099                    await asyncio.sleep(5)
2100
2101    async def migrate_agent(self, agent_name: str, target_node: str) -> dict:
2102        """
2103        Move a running agent to a different node.
2104
2105        If the agent is local: saves updated config (with new node) and re-spawns remotely.
2106        If the agent is remote: publishes a migrate command to its current node.
2107        Returns {"success": bool, "message": str}
2108        """
2109        import time as _time
2110
2111        reg = self._get_spawn_registry()
2112        config = reg.get(agent_name)
2113        if not config:
2114            return {"success": False, "message": f"Agent '{agent_name}' not in spawn registry."}
2115
2116        current_node = config.get("node", "").strip()
2117
2118        if current_node == target_node:
2119            return {"success": False, "message": f"Agent '{agent_name}' is already on '{target_node}'."}
2120
2121        if current_node:
2122            # ── Remote → Remote migration ────────────────────────────────────
2123            logger.info(f"[{self.name}] Migrating '{agent_name}' from node '{current_node}' → '{target_node}'")
2124            await self._mqtt_publish(
2125                f"nodes/{current_node}/migrate",
2126                {"name": agent_name, "target_node": target_node},
2127            )
2128        else:
2129            # ── Local → Remote migration ─────────────────────────────────────
2130            logger.info(f"[{self.name}] Migrating LOCAL agent '{agent_name}' → remote node '{target_node}'")
2131
2132            # Stop the local instance
2133            if self._registry:
2134                local = self._registry.find_by_name(agent_name)
2135                if local:
2136                    try:
2137                        await self._registry.unregister(local.actor_id)
2138                        await local.stop()
2139                        await asyncio.sleep(0.3)
2140                    except Exception as e:
2141                        logger.warning(f"[{self.name}] Could not stop local '{agent_name}': {e}")
2142
2143            # Update config with new node target and re-spawn remotely
2144            new_config = dict(config)
2145            new_config["node"] = target_node
2146            new_config.pop("replace", None)
2147
2148            await self._spawn_remote(new_config, target_node, save=True)
2149
2150        # Update spawn registry so next restart re-spawns to the right node
2151        updated = dict(config)
2152        updated["node"] = target_node
2153        self._save_to_spawn_registry(updated)
2154
2155        msg = (f"Migrating '{agent_name}' from '{current_node or 'local'}' "
2156               f"→ '{target_node}'. It will appear in the dashboard shortly.")
2157        logger.info(f"[{self.name}] {msg}")
2158        return {"success": True, "message": msg}
2159
2160    async def _node_heartbeat_listener(self):
2161        """
2162        Subscribe to nodes/+/heartbeat so main knows which remote nodes are online.
2163        Updates self._known_nodes which is used by list_nodes() and the LLM context.
2164        """
2165        try:
2166            import aiomqtt
2167        except ImportError:
2168            logger.warning("[main] aiomqtt not available — node heartbeat tracking disabled.")
2169            return
2170
2171        while self.state.value not in ("stopped", "failed"):
2172            try:
2173                async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client:
2174                    await client.subscribe("nodes/+/heartbeat")
2175                    await client.subscribe("nodes/+/migrate_result")
2176                    logger.info("[main] Subscribed to node heartbeats.")
2177                    async for msg in client.messages:
2178                        topic = str(msg.topic)
2179                        try:
2180                            data = json.loads(msg.payload.decode())
2181                        except Exception:
2182                            continue
2183
2184                        parts = topic.split("/")
2185                        if len(parts) < 3:
2186                            continue
2187                        node_name = parts[1]
2188
2189                        if topic.endswith("/heartbeat"):
2190                            import time as _t
2191                            self._known_nodes[node_name] = {
2192                                "last_seen": _t.time(),
2193                                "agents":   data.get("agents", []),
2194                                "node_id":  data.get("node_id", ""),
2195                            }
2196                        elif topic.endswith("/migrate_result"):
2197                            success = data.get("success", False)
2198                            agent   = data.get("agent", "?")
2199                            to_node = data.get("to_node", "?")
2200                            sev     = "info" if success else "warning"
2201                            self._pending_notifications.append({
2202                                "_monitor_notification": True,
2203                                "message": (
2204                                    f"Migration of '{agent}' to '{to_node}' succeeded."
2205                                    if success else
2206                                    f"Migration of '{agent}' failed: {data.get('error', '?')}"
2207                                ),
2208                                "severity": sev,
2209                                "timestamp": __import__("time").time(),
2210                            })
2211
2212            except asyncio.CancelledError:
2213                break
2214            except Exception as e:
2215                if self.state.value not in ("stopped", "failed"):
2216                    logger.warning(f"[main] Node heartbeat listener error: {e}. Reconnecting in 5s…")
2217                    await asyncio.sleep(5)
2218
2219    # ── Delegation ─────────────────────────────────────────────────────────
2220
2221    async def delegate_to_installer(self, payload: dict, timeout: float = 300.0) -> dict:
2222        """
2223        Send a task to the installer agent and wait for the result.
2224        Handles node_deploy, node_install, node_run, install, check actions.
2225        timeout is generous (300s) because deploys involve SSH + pip installs.
2226        """
2227        if not self._registry:
2228            return {"error": "No registry available"}
2229        installer = self._registry.find_by_name("installer")
2230        if not installer:
2231            return {"error": "installer agent not found"}
2232
2233        import uuid as _uuid
2234        task_id = f"inst_{_uuid.uuid4().hex[:8]}"
2235        future: asyncio.Future = asyncio.get_event_loop().create_future()
2236        self._result_futures[task_id] = future
2237
2238        payload = dict(payload)
2239        payload["_task_id"] = task_id
2240        payload["task"]     = task_id
2241
2242        await self.send(installer.actor_id, MessageType.TASK, payload)
2243        try:
2244            return await asyncio.wait_for(future, timeout=timeout)
2245        except asyncio.TimeoutError:
2246            return {"error": f"Installer timed out after {timeout}s"}
2247        finally:
2248            self._result_futures.pop(task_id, None)
2249
2250    async def delegate_task(self, target_name: str, task: str, timeout: float = 60.0) -> Optional[dict]:
2251        if not self._registry:
2252            return None
2253        target = self._registry.find_by_name(target_name)
2254        if not target:
2255            return None
2256        future = asyncio.get_event_loop().create_future()
2257        self._result_futures[task] = future
2258        await self.send(target.actor_id, MessageType.TASK, {"text": task, "reply_to": self.actor_id})
2259        try:
2260            return await asyncio.wait_for(future, timeout=timeout)
2261        except asyncio.TimeoutError:
2262            return None
2263        finally:
2264            self._result_futures.pop(task, None)
2265
2266    async def list_agents(self) -> list[dict]:
2267        if not self._registry:
2268            return []
2269        return [a.get_status() for a in self._registry.all_actors()]
2270
2271    async def send_command(self, target_name: str, command: MessageType):
2272        if not self._registry:
2273            return
2274        target = self._registry.find_by_name(target_name)
2275        if target:
2276            await self.send(target.actor_id, command)
2277
2278    async def delete_spawned_agent(self, name: str):
2279        # Find node before removing from registry
2280        reg = self._get_spawn_registry()
2281        node = reg.get(name, {}).get("node", "").strip()
2282
2283        self._remove_from_spawn_registry(name)
2284
2285        # Update desired state so Pi doesn't re-spawn on reconcile
2286        if node:
2287            await self._update_node_desired_state(node, remove_name=name)
2288            await self._mqtt_publish(f"nodes/{node}/stop", {"name": name}, qos=1)
2289
2290        if self._registry:
2291            target = self._registry.find_by_name(name)
2292            if target:
2293                await self._registry.unregister(target.actor_id)
2294                await target.stop()

An Actor that uses an LLM to process tasks. Maintains conversation history and supports tool use.

MainActor( llm_provider: Optional[wactorz.agents.llm_agent.LLMProvider] = None, **kwargs)
666    def __init__(self, llm_provider: Optional[LLMProvider] = None, **kwargs):
667        kwargs.setdefault("name", "main")
668        kwargs.setdefault("system_prompt", ORCHESTRATOR_PROMPT)
669        super().__init__(llm_provider=llm_provider, **kwargs)
670        self._result_futures: dict[str, asyncio.Future] = {}
671        # Queued monitor notifications — prepended to next user response
672        self._pending_notifications: list[dict] = []
673        self.protected = True
674        # Remote node tracking: node_name → {"last_seen": float, "agents": [...]}
675        self._known_nodes: dict[str, dict] = {}
676        # Topic registry: topic → [manifest, ...] — built from agents/+/manifest
677        self._topic_registry: dict[str, list] = {}  # topic → list of agent manifests
678        self._agent_manifests: dict[str, dict] = {}  # agent name → latest manifest (includes schemas)
DESCRIPTION = 'Main orchestrator: spawns agents, routes tasks, manages the multi-agent system'
CAPABILITIES = ['spawn_agent', 'list_agents', 'list_nodes', 'list_topics', 'orchestration']
INTENT_CLASSIFIER_PROMPT = "You are a routing classifier for a smart home AI assistant.\nRespond with exactly one token: HA, PIPELINE, or OTHER.\n\nHA = a direct, one-shot Home Assistant action or query:\n - Turn on/off a device right now\n - List devices, areas, entities, automations\n - Create/edit/delete a HA automation\n - Set temperature, dim lights, lock door — immediate action\n\nPIPELINE = a reactive rule that should run continuously:\n - 'if X happens then do Y' — any conditional/reactive logic\n - 'when X send me a message/notification'\n - 'whenever X turns on/off do Y'\n - Any rule involving a sensor state change triggering an action or notification\n - Any webcam/camera detection triggering anything\n - Anything involving Discord/Telegram notifications triggered by an event\n\nOTHER = general conversation, coding, questions, anything not HA or pipeline related."
protected
async def on_start(self):
682    async def on_start(self):
683        await super().on_start()
684        await self._restore_spawned_agents()
685        # Listen for remote node heartbeats so we know what's online
686        self._tasks.append(asyncio.create_task(self._node_heartbeat_listener()))
687        # Listen for agent capability manifests to build topic registry
688        self._tasks.append(asyncio.create_task(self._manifest_listener()))
689        # Inject persisted user facts into system prompt
690        self._inject_user_facts_into_prompt()

Called when actor starts. Override for init logic.

def get_pipeline_rules(self) -> dict:
714    def get_pipeline_rules(self) -> dict:
715        return self.recall(PIPELINE_RULES_KEY) or {}
def save_pipeline_rule(self, rule: dict):
717    def save_pipeline_rule(self, rule: dict):
718        rules = self.get_pipeline_rules()
719        rules[rule["rule_id"]] = rule
720        self.persist(PIPELINE_RULES_KEY, rules)
721        logger.info(f"[{self.name}] Pipeline rule saved: {rule['rule_id']} agents={rule.get('agents', [])}")
def get_notification_urls(self) -> dict:
723    def get_notification_urls(self) -> dict:
724        """Return persisted notification webhook URLs (discord, telegram, slack, etc.)"""
725        return self.recall("_notification_urls") or {}

Return persisted notification webhook URLs (discord, telegram, slack, etc.)

def get_user_facts(self) -> dict:
743    def get_user_facts(self) -> dict:
744        return self.recall("_user_facts") or {}
async def delete_pipeline_rule(self, rule_id: str) -> str:
787    async def delete_pipeline_rule(self, rule_id: str) -> str:
788        """Stop all agents for a rule and remove it from registry."""
789        rules = self.get_pipeline_rules()
790        rule = rules.get(rule_id)
791        if not rule:
792            return f"No rule found with id '{rule_id}'."
793        agents = rule.get("agents", [])
794        stopped = []
795        for agent_name in agents:
796            self._remove_from_spawn_registry(agent_name)
797            if self._registry:
798                actor = self._registry.find_by_name(agent_name)
799                if actor:
800                    await actor.stop()
801                    await self._registry.unregister(actor.actor_id)
802                    stopped.append(agent_name)
803        del rules[rule_id]
804        self.persist(PIPELINE_RULES_KEY, rules)
805        task_preview = rule.get("task", "")[:60]
806        return f"Rule '{rule_id}' deleted. Stopped agents: {', '.join(stopped) or 'none running'}.\nRule was: {task_preview}"

Stop all agents for a rule and remove it from registry.

async def handle_message(self, msg: Message):
834    async def handle_message(self, msg: Message):
835        if msg.type == MessageType.TASK:
836            # Intercept monitor notifications BEFORE passing to LLM _handle_task
837            if isinstance(msg.payload, dict) and msg.payload.get("_monitor_notification"):
838                self._pending_notifications.append(msg.payload)
839                logger.info(f"[{self.name}] Monitor alert queued: {msg.payload.get('message','')[:80]}")
840                return
841            await self._handle_task(msg)
842
843        elif msg.type == MessageType.RESULT:
844            if isinstance(msg.payload, dict):
845                # Support both key names: "_task_id" (new) and "task" (legacy)
846                fid = msg.payload.get("_task_id") or msg.payload.get("task")
847                if fid and fid in self._result_futures:
848                    fut = self._result_futures[fid]
849                    if not fut.done():
850                        fut.set_result(msg.payload)

Handle messages not caught by default handlers.

async def chat(self, user_message: str) -> str:
930    async def chat(self, user_message: str) -> str:
931        response = await super().chat(user_message)
932        # Fire-and-forget fact extraction — don't block the response
933        asyncio.create_task(self._extract_and_save_facts(user_message, response))
934        return response

Direct async call - useful for the main conversation actor.

async def chat_stream(self, user_message: str):
936    async def chat_stream(self, user_message: str):
937        full_response = []
938        async for chunk in super().chat_stream(user_message):
939            if isinstance(chunk, dict):
940                yield chunk
941            else:
942                full_response.append(chunk)
943                yield chunk
944        # Extract facts from completed response
945        if full_response:
946            asyncio.create_task(
947                self._extract_and_save_facts(user_message, "".join(full_response))
948            )

Streaming version of chat(). Yields text chunks, then a final usage dict. The caller is responsible for printing chunks as they arrive.

Usage: async for chunk in agent.chat_stream("hello"): if isinstance(chunk, dict): usage = chunk # final usage summary else: print(chunk, end="", flush=True)

async def process_user_input(self, text: str) -> str:
 963    async def process_user_input(self, text: str) -> str:
 964        note_prefix = self._drain_notifications()
 965
 966        # ── Direct API intercepts — handle without LLM round-trip ──────────
 967        stripped = text.strip().rstrip("()")
 968        if stripped in ("main.list_nodes", "list_nodes", "/nodes"):
 969            nodes = self.list_nodes()
 970            if not nodes:
 971                return note_prefix + "No remote nodes seen yet. Deploy one with /deploy <node-name>."
 972            import time as _t
 973            lines = []
 974            for nd in sorted(nodes, key=lambda x: x["node"]):
 975                status   = "🟢 online" if nd["online"] else "🔴 offline"
 976                agents   = ", ".join(nd["agents"]) or "(no agents)"
 977                age      = int(_t.time() - nd["last_seen"])
 978                lines.append(f"  {nd['node']:22s} {status}  |  agents: {agents}  |  last heartbeat: {age}s ago")
 979            return note_prefix + "Remote nodes:\n" + "\n".join(lines)
 980
 981        if stripped.startswith("/topics"):
 982            keyword = stripped[7:].strip().lstrip("(").rstrip(")")
 983            topics = self.list_topics(keyword)
 984            if not topics:
 985                msg = f"No topics found" + (f" matching '{keyword}'" if keyword else "") + "."
 986                msg += " Topics are registered automatically when agents publish for the first time."
 987                return note_prefix + msg
 988            lines = [f"Known MQTT topics{' matching ' + repr(keyword) if keyword else ''}:"]
 989            for t in topics:
 990                agent_strs = ", ".join(
 991                    f"{a['name']}" + (f" ({a['node']})" if a.get("node") else "")
 992                    for a in t["agents"]
 993                )
 994                lines.append(f"  {t['topic']:40s}{agent_strs}")
 995            return note_prefix + "\n".join(lines)
 996
 997        # ── Webhook / notification URL management ───────────────────────────
 998        if stripped.startswith("/memory"):
 999            parts = stripped.split(None, 1)
1000            sub = parts[1].strip() if len(parts) > 1 else ""
1001            if sub == "clear":
1002                self.persist("_user_facts", {})
1003                self.persist("history_summary", "")
1004                self._history_summary = ""
1005                self.system_prompt = ORCHESTRATOR_PROMPT
1006                return note_prefix + "Memory cleared — user facts and conversation summary reset."
1007            if sub.startswith("forget "):
1008                key = sub[7:].strip()
1009                facts = self.get_user_facts()
1010                if key in facts:
1011                    del facts[key]
1012                    self.persist("_user_facts", facts)
1013                    self._inject_user_facts_into_prompt()
1014                    return note_prefix + f"Forgotten: '{key}'"
1015                return note_prefix + f"No fact found with key '{key}'."
1016            # Default: show memory
1017            facts = self.get_user_facts()
1018            summary = self._history_summary
1019            lines = []
1020            if facts:
1021                lines.append(f"User facts ({len(facts)}):")
1022                for k, v in facts.items():
1023                    lines.append(f"  {k}: {v}")
1024            else:
1025                lines.append("No user facts stored yet.")
1026            if summary:
1027                lines.append(f"\nConversation summary:\n  {summary[:300]}{'...' if len(summary) > 300 else ''}")
1028            else:
1029                lines.append("\nNo conversation summary yet.")
1030            lines.append("\nCommands: /memory clear | /memory forget <key>")
1031            return note_prefix + "\n".join(lines)
1032
1033        if stripped.startswith("/webhook"):
1034            parts = stripped.split(None, 2)
1035            if len(parts) == 1:
1036                # /webhook — show stored URLs
1037                urls = self.recall("_notification_urls") or {}
1038                if not urls:
1039                    return note_prefix + "No notification URLs stored.\nUse: /webhook discord <url>  or  /webhook telegram <url>"
1040                lines = ["Stored notification URLs:"]
1041                for svc, url in urls.items():
1042                    lines.append(f"  {svc}: {url}")
1043                return note_prefix + "\n".join(lines)
1044            elif len(parts) >= 3:
1045                # /webhook discord <url>
1046                service = parts[1].lower()
1047                url = parts[2].strip()
1048                urls = self.recall("_notification_urls") or {}
1049                urls[service] = url
1050                self.persist("_notification_urls", urls)
1051                return note_prefix + f"Saved {service} webhook URL. Pipelines will use it automatically."
1052            else:
1053                return note_prefix + "Usage: /webhook <service> <url>\nExample: /webhook discord https://discord.com/api/webhooks/..."
1054
1055        # Auto-detect webhook URLs in any message and persist them
1056        import re as _re
1057        _webhook_match = _re.search(
1058            r'https?://(?:discord\.com/api/webhooks|hooks\.slack\.com|api\.telegram\.org)/\S+',
1059            text
1060        )
1061        if _webhook_match:
1062            url = _webhook_match.group(0).rstrip(".,;!)'\"")
1063            urls = self.recall("_notification_urls") or {}
1064            if "discord" in url:
1065                urls["discord"] = url
1066            elif "slack" in url:
1067                urls["slack"] = url
1068            elif "telegram" in url:
1069                urls["telegram"] = url
1070            self.persist("_notification_urls", urls)
1071            logger.info(f"[{self.name}] Auto-saved webhook URL from message")
1072
1073        if stripped in ("/rules", "rules"):
1074            rules = self.get_pipeline_rules()
1075            if not rules:
1076                return note_prefix + "No pipeline rules active.\nDescribe a reactive rule to create one, e.g. 'when the door opens send me a Discord message'."
1077            lines = [f"Active pipeline rules ({len(rules)}):"]
1078            for rule_id, rule in sorted(rules.items(), key=lambda x: x[1].get("created_at", 0)):
1079                agents = rule.get("agents", [])
1080                task = rule.get("task", "")[:80]
1081                import datetime
1082                ts = rule.get("created_at", 0)
1083                created = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M") if ts else "unknown"
1084                # Check which agents are running
1085                running_agents = []
1086                stopped_agents = []
1087                for a in agents:
1088                    if self._registry and self._registry.find_by_name(a):
1089                        running_agents.append(a)
1090                    else:
1091                        stopped_agents.append(a)
1092                status = "🟢" if running_agents else "🔴"
1093                lines.append(f"\n{status} [{rule_id}] — {task}")
1094                lines.append(f"   agents  : {', '.join(agents)}")
1095                if stopped_agents:
1096                    lines.append(f"   stopped : {', '.join(stopped_agents)}")
1097                lines.append(f"   created : {created}")
1098            lines.append("\nTo delete a rule: /rules delete <rule_id>")
1099            return note_prefix + "\n".join(lines)
1100
1101        if stripped.startswith("/rules delete "):
1102            rule_id = stripped[len("/rules delete "):].strip()
1103            result = await self.delete_pipeline_rule(rule_id)
1104            return note_prefix + result
1105
1106        if stripped.startswith("/rules"):
1107            keyword = stripped[14:].strip().lstrip("(").rstrip(")")
1108            caps = self.list_capabilities(keyword)
1109            if not caps:
1110                msg = "No agents found" + (f" matching '{keyword}'" if keyword else "") + "."
1111                msg += " Agents publish their capabilities on startup."
1112                return note_prefix + msg
1113            lines = ["Agent capabilities" + (" matching " + repr(keyword) if keyword else "") + ":"]
1114            for a in caps:
1115                lines.append("")
1116                lines.append("  [" + a["name"] + "]" + (" on " + a["node"] if a.get("node") else ""))
1117                lines.append("    description : " + a["description"])
1118                if a["capabilities"]:
1119                    lines.append("    capabilities: " + ", ".join(a["capabilities"]))
1120                if a["input_schema"]:
1121                    lines.append("    input       : " + str(a["input_schema"]))
1122                if a["output_schema"]:
1123                    lines.append("    output      : " + str(a["output_schema"]))
1124            return note_prefix + "\n".join(lines)
1125
1126                # ── @mention direct routing ─────────────────────────────────────────
1127        if text.startswith("@"):
1128            # Extract agent name and message: "@cpu-monitor-rpi-room what is the cpu?"
1129            parts       = text.split(None, 1)
1130            target_name = parts[0].lstrip("@").rstrip(":,")
1131            message     = parts[1].strip() if len(parts) > 1 else text
1132
1133            # Try local registry first
1134            local_target = self._registry.find_by_name(target_name) if self._registry else None
1135            if not local_target:
1136                # Not running — check if it's a spawnable catalog recipe
1137                manifest = self._agent_manifests.get(target_name, {})
1138                if manifest.get("spawnable") and manifest.get("catalog"):
1139                    catalog_name  = manifest["catalog"]
1140                    catalog_actor = self._registry.find_by_name(catalog_name) if self._registry else None
1141                    if catalog_actor and hasattr(catalog_actor, "_action_spawn"):
1142                        logger.info(f"[main] '{target_name}' not running — auto-spawning via {catalog_name}...")
1143                        try:
1144                            spawn_result = await catalog_actor._action_spawn(target_name, {})
1145                            if spawn_result and spawn_result.get("ok"):
1146                                await asyncio.sleep(0.5)
1147                                local_target = self._registry.find_by_name(target_name) if self._registry else None
1148                                logger.info(f"[main] '{target_name}' spawned, routing task...")
1149                            else:
1150                                err = spawn_result.get("message", "unknown error") if spawn_result else "no response"
1151                                return note_prefix + f"Could not spawn '{target_name}': {err}"
1152                        except Exception as e:
1153                            return note_prefix + f"Could not spawn '{target_name}': {e}"
1154
1155            if local_target:
1156                result = await self.delegate_task(target_name, message, timeout=60.0)
1157                if result:
1158                    reply = result.get("result") or result.get("response") or str(result)
1159                    return note_prefix + f"**{target_name}**: {reply}"
1160                return note_prefix + f"{target_name} did not respond."
1161
1162            # Check if it's a known remote agent
1163            remote_node = None
1164            for node_name, nd in self._known_nodes.items():
1165                if target_name in nd.get("agents", []):
1166                    remote_node = node_name
1167                    break
1168
1169            if remote_node:
1170                # Send via MQTT and wait for reply
1171                import time as _t
1172                reply_topic = f"main/reply/{self.actor_id}/{uuid.uuid4().hex[:8]}"
1173                future: asyncio.Future = asyncio.get_event_loop().create_future()
1174                self._result_futures[reply_topic] = future
1175
1176                await self._mqtt_publish(
1177                    f"agents/by-name/{target_name}/task",
1178                    {"text": message, "_reply_topic": reply_topic,
1179                     "_remote_task": True, "payload": message},
1180                )
1181
1182                # Subscribe briefly for the reply
1183                async def _wait_reply():
1184                    try:
1185                        import aiomqtt
1186                        async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client:
1187                            await client.subscribe(reply_topic)
1188                            async for msg in client.messages:
1189                                try:
1190                                    data = json.loads(msg.payload.decode())
1191                                    if not future.done():
1192                                        future.set_result(data)
1193                                except Exception:
1194                                    pass
1195                                return
1196                    except Exception as e:
1197                        if not future.done():
1198                            future.set_exception(e)
1199
1200                reply_task = asyncio.create_task(_wait_reply())
1201                try:
1202                    result = await asyncio.wait_for(asyncio.shield(future), timeout=30.0)
1203                    reply_task.cancel()
1204                    reply = result.get("result") or result.get("response") or str(result)
1205                    return note_prefix + f"**{target_name}** (on {remote_node}): {reply}"
1206                except asyncio.TimeoutError:
1207                    reply_task.cancel()
1208                    return note_prefix + f"{target_name} on {remote_node} did not respond within 30s."
1209                finally:
1210                    self._result_futures.pop(reply_topic, None)
1211
1212            # Not found locally or remotely
1213            known_remote = [a for nd in self._known_nodes.values() for a in nd.get("agents", [])]
1214            if known_remote:
1215                return note_prefix + (f"Agent '{target_name}' not found. "
1216                    f"Remote agents: {', '.join(known_remote)}")
1217            return note_prefix + f"Agent '{target_name}' not found."
1218
1219        # Explicit planner prefix always wins
1220        lowered = text.lower()
1221        if any(lowered.startswith(p) for p in (
1222            "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ",
1223            "@planner", "set up a pipeline", "create a rule", "set up a rule",
1224        )):
1225            result = await self._run_planner(text)
1226            return note_prefix + (result or "Planner did not return a result. Please retry.")
1227
1228        # Single LLM call classifies intent: HA (direct action), PIPELINE (reactive rule), OTHER
1229        intent = await self._classify_intent(text)
1230        logger.info(f"[{self.name}] Intent: {intent}{text[:60]}")
1231
1232        if intent == "PIPELINE":
1233            result = await self._run_planner(text)
1234            return note_prefix + (result or "Planner did not return a result. Please retry.")
1235
1236        if intent == "HA":
1237            result = await self.delegate_task("home-assistant-agent", text, timeout=120.0)
1238            if result and isinstance(result, dict) and result.get("result"):
1239                return note_prefix + str(result["result"])
1240            if not result:
1241                return note_prefix + "I could not reach the Home Assistant agent right now. Please retry."
1242            return note_prefix + "The Home Assistant agent did not return a result. Please retry."
1243
1244        response = await self.chat(text)
1245
1246        # If the LLM wrote agent code but forgot the <spawn> wrapper, remind it once
1247        has_spawn   = "<spawn>" in response
1248        has_code    = "async def handle_task" in response or "async def setup" in response
1249        asked_spawn = any(w in text.lower() for w in ("spawn", "create", "make", "build", "add", "agent"))
1250        if has_code and not has_spawn and asked_spawn:
1251            logger.info(f"[{self.name}] Code written without <spawn> — prompting to wrap it")
1252            response = await self.chat(
1253                "You wrote agent code but forgot to wrap it in a <spawn> block. "
1254                "Please output the complete spawn block now with that exact code inside it. "
1255                "Output ONLY the <spawn>...</spawn> block, nothing else."
1256            )
1257
1258        clean, spawned = await self._process_spawn_commands(response)
1259
1260        # Execute any @agent-name {payload} delegation patterns the LLM produced
1261        clean = await self._execute_llm_delegations(clean)
1262
1263        await self._mqtt_publish(
1264            f"agents/{self.actor_id}/logs",
1265            {"type": "user_interaction", "input": text[:100], "response": clean[:200]},
1266        )
1267
1268        if spawned:
1269            bg_names   = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)]
1270            live_names = [a.name for a in spawned if not isinstance(a, _SpawnPlaceholder)]
1271            parts = []
1272            if live_names:
1273                replaced = '"replace": true' in response or '"replace":true' in response
1274                action   = "Replaced" if replaced else "Spawned"
1275                parts.append(f"{action} {', '.join(live_names)}")
1276            if bg_names:
1277                parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly")
1278            if parts:
1279                clean += f"\n\n[System: {' | '.join(parts)} — will auto-restore on restart]"
1280
1281        return note_prefix + clean
async def process_user_input_stream(self, text: str):
1283    async def process_user_input_stream(self, text: str):
1284        """
1285        Streaming version of process_user_input().
1286        Yields text chunks as the LLM generates them, then a final dict:
1287          {"done": True, "spawned": [...names...], "system_msg": "..."}
1288
1289        The CLI calls this and prints chunks immediately.
1290        REST/Discord/WhatsApp should use process_user_input() instead.
1291        """
1292        # Drain monitor notifications first
1293        note_prefix = self._drain_notifications()
1294        if note_prefix:
1295            yield note_prefix
1296
1297        # All slash-commands and direct API intercepts are handled by process_user_input
1298        # Route them there to avoid duplicating all that logic here
1299        _stripped = text.strip().rstrip("()")
1300        _is_command = (
1301            _stripped.startswith("/")
1302            or _stripped in ("list_nodes", "main.list_nodes", "rules")
1303            or _stripped.startswith("@")
1304        )
1305        if _is_command:
1306            result = await self.process_user_input(text)
1307            yield result
1308            yield {"done": True, "spawned": [], "system_msg": ""}
1309            return
1310
1311        # Explicit planner prefix always wins
1312        _lowered = text.lower()
1313        if any(_lowered.startswith(p) for p in (
1314            "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ",
1315            "@planner", "set up a pipeline", "create a rule", "set up a rule",
1316        )):
1317            result = await self._run_planner(text)
1318            yield result or "Planner did not return a result. Please retry."
1319            yield {"done": True, "spawned": [], "system_msg": ""}
1320            return
1321
1322        # Single LLM call classifies intent: HA, PIPELINE, or OTHER
1323        intent = await self._classify_intent(text)
1324        logger.info(f"[{self.name}] Intent: {intent}{text[:60]}")
1325
1326        if intent == "PIPELINE":
1327            result = await self._run_planner(text)
1328            yield result or "Planner did not return a result. Please retry."
1329            yield {"done": True, "spawned": [], "system_msg": ""}
1330            return
1331
1332        if intent == "HA":
1333            result = await self.delegate_task("home-assistant-agent", text, timeout=120.0)
1334            if result and isinstance(result, dict) and result.get("result"):
1335                yield str(result["result"])
1336            elif not result:
1337                yield "I could not reach the Home Assistant agent right now. Please retry."
1338            else:
1339                yield "The Home Assistant agent did not return a result. Please retry."
1340            yield {"done": True, "spawned": [], "system_msg": ""}
1341            return
1342
1343        # Stream the LLM response chunk by chunk
1344        full_chunks = []
1345        async for chunk in self.chat_stream(text):
1346            if isinstance(chunk, dict):
1347                break   # usage dict — discard, already tracked inside chat_stream
1348            full_chunks.append(chunk)
1349            yield chunk
1350
1351        full_response = "".join(full_chunks)
1352
1353        # Process any <spawn> blocks in the completed response
1354        _, spawned = await self._process_spawn_commands(full_response)
1355
1356        # Execute any @agent-name {payload} delegation patterns the LLM produced
1357        # If delegations ran, yield the results as an additional chunk
1358        delegated = await self._execute_llm_delegations(full_response)
1359        if delegated != full_response:
1360            # Find what changed and yield just the new parts
1361            import re as _re
1362            results = _re.findall(r'[✅❌]\s+\S+.*', delegated)
1363            if results:
1364                yield "\n" + "\n".join(results)
1365        full_response = delegated
1366
1367        system_msg = ""
1368        if spawned:
1369            names      = ", ".join(f"'{a.name}'" for a in spawned if not isinstance(a, _SpawnPlaceholder))
1370            bg_names   = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)]
1371            parts = []
1372            if names:
1373                replaced = '"replace": true' in full_response or '"replace":true' in full_response
1374                parts.append(f"{'Replaced' if replaced else 'Spawned'} {names} — will auto-restore on restart")
1375            if bg_names:
1376                parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly")
1377            system_msg = " | ".join(parts)
1378
1379        await self._mqtt_publish(
1380            f"agents/{self.actor_id}/logs",
1381            {"type": "user_interaction", "input": text[:100], "response": full_response[:200]},
1382        )
1383
1384        yield {"done": True, "spawned": spawned, "system_msg": system_msg}

Streaming version of process_user_input(). Yields text chunks as the LLM generates them, then a final dict: {"done": True, "spawned": [...names...], "system_msg": "..."}

The CLI calls this and prints chunks immediately. REST/Discord/WhatsApp should use process_user_input() instead.

async def run_pipeline( self, goal: str, agents: list[str], timeout: float = 300.0, force_replan: bool = False) -> dict:
1877    async def run_pipeline(self, goal: str, agents: list[str], timeout: float = 300.0, force_replan: bool = False) -> dict:
1878        """
1879        Spawn an ephemeral TaskManager to coordinate a multi-agent pipeline.
1880        Returns the final synthesised result without blocking main's context.
1881
1882        Usage:
1883            result = await main.run_pipeline(
1884                goal="Find the Philips EP2220 manual and answer: how do I descale it?",
1885                agents=["manual-agent", "installer"]
1886            )
1887        """
1888        from .task_manager import TaskManager
1889        import uuid
1890
1891        task_id = uuid.uuid4().hex[:8]
1892        future  = asyncio.get_event_loop().create_future()
1893        self._result_futures[task_id] = future
1894
1895        mgr = await self.spawn(
1896            TaskManager,
1897            goal=goal,
1898            available_agents=agents,
1899            llm_provider=self.llm,
1900            reply_to_id=self.actor_id,
1901            reply_task_id=task_id,
1902            auto_destroy=True,
1903            force_replan=force_replan,
1904            cache_dir=str(self._persistence_dir.parent / "plan_cache"),
1905            persistence_dir=str(self._persistence_dir.parent),
1906        )
1907
1908        logger.info(f"[{self.name}] Pipeline started: {mgr.name} for goal: {goal[:60]}")
1909
1910        try:
1911            result = await asyncio.wait_for(future, timeout=timeout)
1912            return result
1913        except asyncio.TimeoutError:
1914            logger.warning(f"[{self.name}] Pipeline timed out after {timeout}s")
1915            return {"error": f"Pipeline timed out after {timeout}s"}
1916        finally:
1917            self._result_futures.pop(task_id, None)

Spawn an ephemeral TaskManager to coordinate a multi-agent pipeline. Returns the final synthesised result without blocking main's context.

Usage: result = await main.run_pipeline( goal="Find the Philips EP2220 manual and answer: how do I descale it?", agents=["manual-agent", "installer"] )

def list_nodes(self) -> list[dict]:
1985    def list_nodes(self) -> list[dict]:
1986        """Return all known remote nodes with their last-seen time and running agents."""
1987        import time as _time
1988        now = _time.time()
1989        return [
1990            {
1991                "node":      name,
1992                "agents":    info.get("agents", []),
1993                "last_seen": info.get("last_seen", 0),
1994                "online":    (now - info.get("last_seen", 0)) < 30,
1995            }
1996            for name, info in self._known_nodes.items()
1997        ]

Return all known remote nodes with their last-seen time and running agents.

def list_topics(self, keyword: str = '') -> list[dict]:
1999    def list_topics(self, keyword: str = "") -> list[dict]:
2000        """
2001        Return all known MQTT topics published by agents, optionally filtered by keyword.
2002        Each entry: {"topic": str, "agents": [{"name", "node", "description"}, ...]}
2003
2004        Example:
2005            list_topics("cpu")     → topics containing "cpu"
2006            list_topics("temp")    → topics containing "temp"
2007            list_topics()          → all topics
2008        """
2009        results = []
2010        kw = keyword.lower()
2011        for topic, manifests in self._topic_registry.items():
2012            if kw and kw not in topic.lower():
2013                continue
2014            results.append({
2015                "topic":   topic,
2016                "agents":  [{"name": m.get("name"), "node": m.get("node"),
2017                             "description": m.get("description", "")} for m in manifests],
2018            })
2019        return sorted(results, key=lambda x: x["topic"])

Return all known MQTT topics published by agents, optionally filtered by keyword. Each entry: {"topic": str, "agents": [{"name", "node", "description"}, ...]}

Example: list_topics("cpu") → topics containing "cpu" list_topics("temp") → topics containing "temp" list_topics() → all topics

def list_capabilities(self, keyword: str = '') -> list[dict]:
2021    def list_capabilities(self, keyword: str = "") -> list[dict]:
2022        """
2023        Return all known agents with their full capability profile:
2024        name, description, capabilities, input_schema, output_schema.
2025
2026        Example:
2027            list_capabilities()            → all agents
2028            list_capabilities("weather")   → agents with "weather" in description/capabilities
2029        """
2030        results = []
2031        kw = keyword.lower().strip()
2032        # Support multi-word keywords — match if ANY word appears in the haystack
2033        kw_words = kw.split() if kw else []
2034        for name, manifest in self._agent_manifests.items():
2035            desc  = manifest.get("description", "")
2036            caps  = manifest.get("capabilities", [])
2037            # Filter by keyword across description, capabilities, and name
2038            if kw_words:
2039                haystack = desc.lower() + " " + " ".join(caps).lower() + " " + name.lower()
2040                if not any(w in haystack for w in kw_words):
2041                    continue
2042            results.append({
2043                "name":          name,
2044                "node":          manifest.get("node"),
2045                "description":   desc,
2046                "capabilities":  caps,
2047                "input_schema":  manifest.get("input_schema",  {}),
2048                "output_schema": manifest.get("output_schema", {}),
2049                "spawnable":     manifest.get("spawnable", False),
2050                "running":       bool(self._registry and self._registry.find_by_name(name)),
2051            })
2052        return sorted(results, key=lambda x: x["name"])

Return all known agents with their full capability profile: name, description, capabilities, input_schema, output_schema.

Example: list_capabilities() → all agents list_capabilities("weather") → agents with "weather" in description/capabilities

async def migrate_agent(self, agent_name: str, target_node: str) -> dict:
2101    async def migrate_agent(self, agent_name: str, target_node: str) -> dict:
2102        """
2103        Move a running agent to a different node.
2104
2105        If the agent is local: saves updated config (with new node) and re-spawns remotely.
2106        If the agent is remote: publishes a migrate command to its current node.
2107        Returns {"success": bool, "message": str}
2108        """
2109        import time as _time
2110
2111        reg = self._get_spawn_registry()
2112        config = reg.get(agent_name)
2113        if not config:
2114            return {"success": False, "message": f"Agent '{agent_name}' not in spawn registry."}
2115
2116        current_node = config.get("node", "").strip()
2117
2118        if current_node == target_node:
2119            return {"success": False, "message": f"Agent '{agent_name}' is already on '{target_node}'."}
2120
2121        if current_node:
2122            # ── Remote → Remote migration ────────────────────────────────────
2123            logger.info(f"[{self.name}] Migrating '{agent_name}' from node '{current_node}' → '{target_node}'")
2124            await self._mqtt_publish(
2125                f"nodes/{current_node}/migrate",
2126                {"name": agent_name, "target_node": target_node},
2127            )
2128        else:
2129            # ── Local → Remote migration ─────────────────────────────────────
2130            logger.info(f"[{self.name}] Migrating LOCAL agent '{agent_name}' → remote node '{target_node}'")
2131
2132            # Stop the local instance
2133            if self._registry:
2134                local = self._registry.find_by_name(agent_name)
2135                if local:
2136                    try:
2137                        await self._registry.unregister(local.actor_id)
2138                        await local.stop()
2139                        await asyncio.sleep(0.3)
2140                    except Exception as e:
2141                        logger.warning(f"[{self.name}] Could not stop local '{agent_name}': {e}")
2142
2143            # Update config with new node target and re-spawn remotely
2144            new_config = dict(config)
2145            new_config["node"] = target_node
2146            new_config.pop("replace", None)
2147
2148            await self._spawn_remote(new_config, target_node, save=True)
2149
2150        # Update spawn registry so next restart re-spawns to the right node
2151        updated = dict(config)
2152        updated["node"] = target_node
2153        self._save_to_spawn_registry(updated)
2154
2155        msg = (f"Migrating '{agent_name}' from '{current_node or 'local'}' "
2156               f"→ '{target_node}'. It will appear in the dashboard shortly.")
2157        logger.info(f"[{self.name}] {msg}")
2158        return {"success": True, "message": msg}

Move a running agent to a different node.

If the agent is local: saves updated config (with new node) and re-spawns remotely. If the agent is remote: publishes a migrate command to its current node. Returns {"success": bool, "message": str}

async def delegate_to_installer(self, payload: dict, timeout: float = 300.0) -> dict:
2221    async def delegate_to_installer(self, payload: dict, timeout: float = 300.0) -> dict:
2222        """
2223        Send a task to the installer agent and wait for the result.
2224        Handles node_deploy, node_install, node_run, install, check actions.
2225        timeout is generous (300s) because deploys involve SSH + pip installs.
2226        """
2227        if not self._registry:
2228            return {"error": "No registry available"}
2229        installer = self._registry.find_by_name("installer")
2230        if not installer:
2231            return {"error": "installer agent not found"}
2232
2233        import uuid as _uuid
2234        task_id = f"inst_{_uuid.uuid4().hex[:8]}"
2235        future: asyncio.Future = asyncio.get_event_loop().create_future()
2236        self._result_futures[task_id] = future
2237
2238        payload = dict(payload)
2239        payload["_task_id"] = task_id
2240        payload["task"]     = task_id
2241
2242        await self.send(installer.actor_id, MessageType.TASK, payload)
2243        try:
2244            return await asyncio.wait_for(future, timeout=timeout)
2245        except asyncio.TimeoutError:
2246            return {"error": f"Installer timed out after {timeout}s"}
2247        finally:
2248            self._result_futures.pop(task_id, None)

Send a task to the installer agent and wait for the result. Handles node_deploy, node_install, node_run, install, check actions. timeout is generous (300s) because deploys involve SSH + pip installs.

async def delegate_task(self, target_name: str, task: Any, timeout: float = 60.0):
316    async def _delegate_task_with_normalized_key(self, target_name: str, task: Any, timeout: float = 60.0):
317        if not self._registry:
318            return None
319        target = self._registry.find_by_name(target_name)
320        if not target:
321            return None
322
323        task_key = _normalize_delegate_task_key(task)
324        future = asyncio.get_event_loop().create_future()
325        self._result_futures[task_key] = future
326        await self.send(
327            target.actor_id,
328            MessageType.TASK,
329            {"text": task, "task": task_key, "reply_to": self.actor_id},
330        )
331        try:
332            return await asyncio.wait_for(future, timeout=timeout)
333        except asyncio.TimeoutError:
334            return None
335        finally:
336            self._result_futures.pop(task_key, None)

The type of the None singleton.

async def list_agents(self) -> list[dict]:
2266    async def list_agents(self) -> list[dict]:
2267        if not self._registry:
2268            return []
2269        return [a.get_status() for a in self._registry.all_actors()]
async def send_command(self, target_name: str, command: MessageType):
2271    async def send_command(self, target_name: str, command: MessageType):
2272        if not self._registry:
2273            return
2274        target = self._registry.find_by_name(target_name)
2275        if target:
2276            await self.send(target.actor_id, command)
async def delete_spawned_agent(self, name: str):
2278    async def delete_spawned_agent(self, name: str):
2279        # Find node before removing from registry
2280        reg = self._get_spawn_registry()
2281        node = reg.get(name, {}).get("node", "").strip()
2282
2283        self._remove_from_spawn_registry(name)
2284
2285        # Update desired state so Pi doesn't re-spawn on reconcile
2286        if node:
2287            await self._update_node_desired_state(node, remove_name=name)
2288            await self._mqtt_publish(f"nodes/{node}/stop", {"name": name}, qos=1)
2289
2290        if self._registry:
2291            target = self._registry.find_by_name(name)
2292            if target:
2293                await self._registry.unregister(target.actor_id)
2294                await target.stop()
class MonitorActor(wactorz.Actor):
 29class MonitorActor(Actor):
 30
 31    def __init__(
 32        self,
 33        check_interval:    float = 15.0,
 34        heartbeat_timeout: float = 60.0,
 35        auto_restart:      bool  = False,
 36        **kwargs,
 37    ):
 38        kwargs.setdefault("name", "monitor")
 39        super().__init__(**kwargs)
 40        self.check_interval    = check_interval
 41        self.heartbeat_timeout = heartbeat_timeout
 42        self.auto_restart      = auto_restart
 43        self.protected         = True
 44
 45        self._last_seen:      dict[str, float] = {}
 46        self._alert_state:    dict[str, bool]  = {}
 47
 48        # Error event registry: actor_id → latest error event dict
 49        self._error_registry: dict[str, dict]  = {}
 50        # Cooldown: actor_id → last time we notified main about it
 51        self._last_notified:  dict[str, float] = {}
 52        # Track which actors we've attempted to restart this session
 53        self._restart_attempts: dict[str, int] = {}
 54
 55    async def on_start(self):
 56        if self._registry:
 57            now = time.time()
 58            for actor in self._registry.all_actors():
 59                if actor.actor_id != self.actor_id:
 60                    self._last_seen[actor.actor_id] = now
 61
 62        self._tasks.append(asyncio.create_task(self._monitor_loop()))
 63        logger.info(f"[{self.name}] Monitor started. check_interval={self.check_interval}s")
 64
 65    # ── Message handling ───────────────────────────────────────────────────
 66
 67    async def handle_message(self, msg: Message):
 68        # Heartbeat — any message counts as alive
 69        if msg.sender_id and msg.sender_id != self.actor_id:
 70            self._last_seen[msg.sender_id] = time.time()
 71            if self._alert_state.get(msg.sender_id):
 72                logger.info(f"[{self.name}] Actor {msg.sender_id[:8]} recovered.")
 73                self._alert_state[msg.sender_id] = False
 74
 75        # Structured error event from agents/{id}/errors (routed via MQTT bridge)
 76        if msg.type == MessageType.TASK and isinstance(msg.payload, dict):
 77            if msg.payload.get("_monitor_error_event"):
 78                await self._handle_error_event(msg.payload)
 79
 80    # ── Monitor loop ───────────────────────────────────────────────────────
 81
 82    async def _monitor_loop(self):
 83        while self.state not in (ActorState.STOPPED, ActorState.FAILED):
 84            try:
 85                await asyncio.sleep(self.check_interval)
 86                await self._ping_all_actors()
 87                await self._check_all_actors()
 88                await self._check_error_registry()
 89                await self._publish_system_health()
 90            except asyncio.CancelledError:
 91                break
 92            except Exception as e:
 93                logger.error(f"[{self.name}] Monitor loop error: {e}")
 94
 95    async def _ping_all_actors(self):
 96        if not self._registry:
 97            return
 98        for actor in self._registry.all_actors():
 99            if actor.actor_id != self.actor_id:
100                try:
101                    await self.send(actor.actor_id, MessageType.STATUS_REQUEST, None)
102                except Exception:
103                    pass
104
105    async def _check_all_actors(self):
106        if not self._registry:
107            return
108        now = time.time()
109        for actor in self._registry.all_actors():
110            if actor.actor_id == self.actor_id:
111                continue
112            if actor.actor_id not in self._last_seen:
113                self._last_seen[actor.actor_id] = now
114                continue
115            if actor.state == ActorState.RUNNING:
116                start_age = now - (actor.metrics.start_time or now)
117                if start_age < self.heartbeat_timeout:
118                    self._last_seen[actor.actor_id] = max(
119                        self._last_seen[actor.actor_id], now - start_age
120                    )
121            # Heartbeat fires every 10s — use as secondary liveness signal
122            hb = getattr(actor.metrics, "last_heartbeat", None)
123            if hb and hb > self._last_seen.get(actor.actor_id, 0):
124                self._last_seen[actor.actor_id] = hb
125
126            gap = now - self._last_seen[actor.actor_id]
127            if gap > self.heartbeat_timeout and actor.state == ActorState.RUNNING:
128                if not self._alert_state.get(actor.actor_id):
129                    self._alert_state[actor.actor_id] = True
130                    await self._fire_heartbeat_alert(actor, gap)
131                    if self.auto_restart:
132                        await self._attempt_restart(actor, reason="heartbeat timeout")
133            else:
134                if self._alert_state.get(actor.actor_id) and gap <= self.heartbeat_timeout:
135                    self._alert_state[actor.actor_id] = False
136
137    # ── Error event handling ───────────────────────────────────────────────
138
139    async def _handle_error_event(self, event: dict):
140        """
141        Called when an agent publishes a structured error.
142        Decides: log / restart / escalate to user.
143        """
144        actor_id = event.get("actor_id", "")
145        name     = event.get("name", actor_id[:8])
146        phase    = event.get("phase", "unknown")
147        error    = event.get("error", "")
148        severity = event.get("severity", "warning")
149        fatal    = event.get("fatal", False)
150        degraded = event.get("degraded", False)
151        consec   = event.get("consecutive", 1)
152
153        # Store in registry for health checks
154        self._error_registry[actor_id] = event
155
156        logger.warning(
157            f"[{self.name}] Error event from '{name}': "
158            f"phase={phase} severity={severity} consecutive={consec}"
159        )
160
161        # ── Recovery decision ──────────────────────────────────────────────
162        if fatal:
163            # Bad code / setup failure — restart won't help without a fix
164            msg = (
165                f"**{name}** failed during *{phase}* and cannot run: `{error}`. "
166                f"The agent needs its code fixed before it can be used."
167            )
168            await self._notify_main(actor_id, name, msg, severity="critical")
169            await self._fire_error_alert(event)
170
171        elif severity == "critical" or degraded:
172            # Repeated runtime errors — try a restart
173            actor = self._find_actor(actor_id)
174            if actor and self._restart_attempts.get(actor_id, 0) < 3:
175                self._restart_attempts[actor_id] = self._restart_attempts.get(actor_id, 0) + 1
176                restarted = await self._attempt_restart(actor, reason=f"{phase} error (attempt {self._restart_attempts[actor_id]})")
177                if restarted:
178                    msg = (
179                        f"**{name}** kept crashing in *{phase}* ({consec}x), "
180                        f"so I restarted it. Latest error: `{error}`."
181                    )
182                else:
183                    msg = (
184                        f"**{name}** is crashing repeatedly in *{phase}* "
185                        f"and I couldn't restart it. Error: `{error}`."
186                    )
187            else:
188                attempts = self._restart_attempts.get(actor_id, 0)
189                msg = (
190                    f"**{name}** has failed {consec} times in *{phase}* "
191                    f"(restart attempted {attempts}x). Error: `{error}`. "
192                    f"It may need its code fixed."
193                )
194            await self._notify_main(actor_id, name, msg, severity="critical")
195            await self._fire_error_alert(event)
196
197        else:
198            # Single warning — log and let agent recover on its own
199            await self._fire_error_alert(event)
200
201    async def _check_error_registry(self):
202        """Periodically re-notify main about persistently degraded agents."""
203        now = time.time()
204        for actor_id, event in list(self._error_registry.items()):
205            last = self._last_notified.get(actor_id, 0)
206            if event.get("degraded") and (now - last) > _NOTIFY_COOLDOWN:
207                actor = self._find_actor(actor_id)
208                name  = event.get("name", actor_id[:8])
209                # If agent has recovered (error count reset), clean up registry
210                if actor and hasattr(actor, "_consecutive_errors") and actor._consecutive_errors == 0:
211                    del self._error_registry[actor_id]
212                    await self._notify_main(
213                        actor_id, name,
214                        f"**{name}** has recovered and is running normally again. ✅",
215                        severity="info",
216                    )
217
218    # ── User notification ──────────────────────────────────────────────────
219
220    async def _notify_main(
221        self,
222        actor_id: str,
223        agent_name: str,
224        message: str,
225        severity: str = "warning",
226    ):
227        """
228        Send a structured notification to MainActor so it can relay to the user
229        in natural language during their next interaction (or immediately if idle).
230        """
231        now = time.time()
232        cooldown = self._last_notified.get(actor_id, 0)
233        if (now - cooldown) < _NOTIFY_COOLDOWN and severity != "info":
234            return   # Don't spam
235
236        self._last_notified[actor_id] = now
237
238        if not self._registry:
239            return
240        main = self._registry.find_by_name("main")
241        if not main:
242            return
243
244        try:
245            await self.send(main.actor_id, MessageType.TASK, {
246                "_monitor_notification": True,
247                "agent_name":  agent_name,
248                "message":     message,
249                "severity":    severity,
250                "timestamp":   now,
251            })
252            logger.info(f"[{self.name}] Notified main about '{agent_name}': {message[:80]}")
253        except Exception as e:
254            logger.error(f"[{self.name}] Failed to notify main: {e}")
255
256    # ── Alerting ───────────────────────────────────────────────────────────
257
258    async def _fire_heartbeat_alert(self, actor: Actor, gap: float):
259        alert = {
260            "actor_id":      actor.actor_id,
261            "name":          actor.name,
262            "last_seen_ago": gap,
263            "state":         actor.state.value,
264            "timestamp":     time.time(),
265            "severity":      "warning" if gap < 120 else "critical",
266        }
267        logger.warning(f"[{self.name}] ALERT: {actor.name} unresponsive for {gap:.0f}s")
268        await self._mqtt_publish(f"agents/{actor.actor_id}/alert", alert)
269
270        # Notify main only for user-spawned agents
271        _infra = {"monitor", "installer", "main", "code-agent",
272                  "anomaly-detector", "home-assistant-agent"}
273        if actor.name not in _infra:
274            await self._notify_main(
275                actor.actor_id,
276                actor.name,
277                f"**{actor.name}** has been unresponsive for {gap:.0f}s.",
278                severity="warning",
279            )
280
281    async def _fire_error_alert(self, event: dict):
282        await self._mqtt_publish(
283            f"agents/{event.get('actor_id', 'unknown')}/alert",
284            {
285                "actor_id":  event.get("actor_id"),
286                "name":      event.get("name"),
287                "message":   f"[{event.get('phase')}] {event.get('error')}",
288                "severity":  event.get("severity", "warning"),
289                "timestamp": time.time(),
290            },
291        )
292
293    # ── Restart ────────────────────────────────────────────────────────────
294
295    async def _attempt_restart(self, actor: Actor, reason: str = "") -> bool:
296        logger.info(f"[{self.name}] Restarting '{actor.name}' — reason: {reason}")
297        try:
298            if actor.state != ActorState.STOPPED:
299                await actor.stop()
300                await asyncio.sleep(0.5)
301            await actor.start()
302            self._last_seen[actor.actor_id] = time.time()
303            logger.info(f"[{self.name}] '{actor.name}' restarted successfully.")
304            return True
305        except Exception as e:
306            logger.error(f"[{self.name}] Restart of '{actor.name}' failed: {e}")
307            return False
308
309    # ── Helpers ────────────────────────────────────────────────────────────
310
311    def _find_actor(self, actor_id: str) -> Optional[Actor]:
312        if not self._registry:
313            return None
314        for a in self._registry.all_actors():
315            if a.actor_id == actor_id:
316                return a
317        return None
318
319    async def _publish_system_health(self):
320        if not self._registry:
321            return
322        now    = time.time()
323        actors = self._registry.all_actors()
324        health = {
325            "timestamp":    now,
326            "total_actors": len(actors),
327            "running":  sum(1 for a in actors if a.state == ActorState.RUNNING),
328            "stopped":  sum(1 for a in actors if a.state == ActorState.STOPPED),
329            "failed":   sum(1 for a in actors if a.state == ActorState.FAILED),
330            "degraded": len(self._error_registry),
331            "actors": [
332                {
333                    "id":            a.actor_id,
334                    "name":          a.name,
335                    "state":         a.state.value,
336                    "last_seen_ago": now - self._last_seen.get(a.actor_id, now),
337                    "consecutive_errors": getattr(a, "_consecutive_errors", 0),
338                    "error_phase":        getattr(a, "_error_phase", ""),
339                }
340                for a in actors
341            ],
342        }
343        await self._mqtt_publish("system/health", health)

Base Actor class. All agents inherit from this. Actors are fully async and communicate only through messages.

MonitorActor( check_interval: float = 15.0, heartbeat_timeout: float = 60.0, auto_restart: bool = False, **kwargs)
31    def __init__(
32        self,
33        check_interval:    float = 15.0,
34        heartbeat_timeout: float = 60.0,
35        auto_restart:      bool  = False,
36        **kwargs,
37    ):
38        kwargs.setdefault("name", "monitor")
39        super().__init__(**kwargs)
40        self.check_interval    = check_interval
41        self.heartbeat_timeout = heartbeat_timeout
42        self.auto_restart      = auto_restart
43        self.protected         = True
44
45        self._last_seen:      dict[str, float] = {}
46        self._alert_state:    dict[str, bool]  = {}
47
48        # Error event registry: actor_id → latest error event dict
49        self._error_registry: dict[str, dict]  = {}
50        # Cooldown: actor_id → last time we notified main about it
51        self._last_notified:  dict[str, float] = {}
52        # Track which actors we've attempted to restart this session
53        self._restart_attempts: dict[str, int] = {}
check_interval
heartbeat_timeout
auto_restart
protected
async def on_start(self):
55    async def on_start(self):
56        if self._registry:
57            now = time.time()
58            for actor in self._registry.all_actors():
59                if actor.actor_id != self.actor_id:
60                    self._last_seen[actor.actor_id] = now
61
62        self._tasks.append(asyncio.create_task(self._monitor_loop()))
63        logger.info(f"[{self.name}] Monitor started. check_interval={self.check_interval}s")

Called when actor starts. Override for init logic.

async def handle_message(self, msg: Message):
67    async def handle_message(self, msg: Message):
68        # Heartbeat — any message counts as alive
69        if msg.sender_id and msg.sender_id != self.actor_id:
70            self._last_seen[msg.sender_id] = time.time()
71            if self._alert_state.get(msg.sender_id):
72                logger.info(f"[{self.name}] Actor {msg.sender_id[:8]} recovered.")
73                self._alert_state[msg.sender_id] = False
74
75        # Structured error event from agents/{id}/errors (routed via MQTT bridge)
76        if msg.type == MessageType.TASK and isinstance(msg.payload, dict):
77            if msg.payload.get("_monitor_error_event"):
78                await self._handle_error_event(msg.payload)

Handle messages not caught by default handlers.

CodeAgent
class ManualAgent(wactorz.Actor):
 37class ManualAgent(Actor):
 38    """
 39    Pre-defined agent that finds, downloads, and answers questions from device manuals.
 40    Requires: httpx  (+ pdfplumber or pymupdf for PDF extraction)
 41    """
 42
 43    def __init__(self, llm_provider=None, **kwargs):
 44        kwargs.setdefault("name", "manual-agent")
 45        super().__init__(**kwargs)
 46        self.llm              = llm_provider
 47        self._manual_text:    Optional[str]  = None
 48        self._manual_device:  Optional[str]  = None
 49        self._manual_url:     Optional[str]  = None
 50        self._manual_pages:   int            = 0
 51
 52    def _current_task_description(self) -> str:
 53        if self._manual_device:
 54            return f"loaded: {self._manual_device}"
 55        return "idle — no manual loaded"
 56
 57    async def on_start(self):
 58        await self._mqtt_publish(
 59            f"agents/{self.actor_id}/logs",
 60            {"type": "log", "message": "Manual agent ready. Send {action: load_manual, device: ...} to begin.", "timestamp": time.time()},
 61        )
 62        logger.info(f"[{self.name}] Ready.")
 63
 64    # ── Direct chat() entry point (used by CLIInterface) ───────────────────
 65
 66    async def chat(self, message: str) -> str:
 67        """
 68        Synchronous-style entry point for CLIInterface and other direct callers.
 69        Parses the message as JSON payload or plain-text question, executes the
 70        action, and returns a human-readable string response.
 71        """
 72        payload = None
 73        stripped = message.strip()
 74        if stripped.startswith("{"):
 75            try:
 76                payload = json.loads(stripped)
 77            except json.JSONDecodeError:
 78                pass
 79
 80        if payload and isinstance(payload, dict):
 81            result = await self._handle_task_payload(payload)
 82        else:
 83            if self._manual_text:
 84                result = await self._ask(stripped)
 85            else:
 86                result = {
 87                    "error": "No manual loaded yet.",
 88                    "hint": 'Send: {"action": "load_manual", "device": "Your Device Model"}',
 89                }
 90
 91        return self._format_result(result)
 92
 93    def _format_result(self, result: dict) -> str:
 94        """Turn a result dict into a readable string for chat output."""
 95        if "error" in result:
 96            msg = result["error"]
 97            hint = result.get("hint", "")
 98            return f"[error] {msg}\n{hint}".strip()
 99
100        if "answer" in result:
101            return result["answer"]
102
103        if result.get("success"):
104            return (
105                f"Manual loaded: {result.get('device', '?')}\n"
106                f"  URL:   {result.get('url', '?')}\n"
107                f"  Pages: {result.get('pages', '?')}\n"
108                f"  Chars: {result.get('chars', '?'):,}\n"
109                f"  Preview: {result.get('preview', '')[:200]}"
110            )
111
112        if "status" in result:
113            if result["status"] == "cleared":
114                return "Manual cleared."
115            if result["status"] == "loaded":
116                return (
117                    f"Loaded: {result.get('device', '?')} "
118                    f"({result.get('pages', '?')} pages, {result.get('chars', '?'):,} chars)"
119                )
120            return result.get("message", str(result))
121
122        return str(result)
123
124    # ── Message-based entry point (actor mailbox) ──────────────────────────
125
126    async def handle_message(self, msg: Message):
127        if msg.type == MessageType.TASK:
128            try:
129                result = await self._handle_task(msg)
130            except Exception as e:
131                logger.error(f"[{self.name}] Task handling failed: {e}", exc_info=True)
132                result = {"error": f"Internal error: {e}"}
133
134            target = msg.reply_to or msg.sender_id
135            if target:
136                await self.send(target, MessageType.RESULT, result)
137            else:
138                logger.warning(
139                    f"[{self.name}] No reply target (reply_to={msg.reply_to!r}, "
140                    f"sender_id={msg.sender_id!r}). Result discarded: {result}"
141                )
142
143    async def _handle_task(self, msg: Message) -> dict:
144        payload = msg.payload if isinstance(msg.payload, dict) else {}
145        if not isinstance(msg.payload, dict):
146            text = str(msg.payload).strip()
147            if text:
148                return await self._ask(text)
149            return {"error": "Send a dict payload with 'action' key"}
150
151        return await self._handle_task_payload(payload)
152
153    async def _handle_task_payload(self, payload: dict) -> dict:
154        """Core task dispatcher — shared by both chat() and handle_message()."""
155        action = payload.get("action", "").lower()
156
157        if action == "load_manual":
158            device = payload.get("device") or payload.get("query", "")
159            if not device:
160                return {"error": "Missing 'device' field"}
161            return await self._load_manual(device)
162
163        if action == "ask":
164            question = payload.get("question") or payload.get("query") or payload.get("text", "")
165            if not question:
166                return {"error": "Missing 'question' field"}
167            return await self._ask(question)
168
169        if action == "status":
170            return self._status()
171
172        if action == "clear":
173            self._manual_text   = None
174            self._manual_device = None
175            self._manual_url    = None
176            self._manual_pages  = 0
177            return {"status": "cleared"}
178
179        if "question" in payload or "query" in payload:
180            return await self._ask(payload.get("question") or payload.get("query", ""))
181
182        return {
183            "error": f"Unknown action: '{action}'",
184            "supported": ["load_manual", "ask", "status", "clear"],
185        }
186
187    # ── Load manual ────────────────────────────────────────────────────────
188
189    async def _load_manual(self, device: str) -> dict:
190        await self._log(f"Searching for manual: {device}")
191
192        loop    = asyncio.get_event_loop()
193        pdf_url = await loop.run_in_executor(None, lambda: self._search_for_manual(device))
194
195        if not pdf_url:
196            await self._alert(f"No PDF manual found for: {device}", "warning")
197            return {"error": f"Could not find a PDF manual for: {device}"}
198
199        await self._log(f"Found: {pdf_url}")
200
201        pdf_bytes = await self._download_pdf(pdf_url)
202        if not pdf_bytes:
203            return {"error": f"Failed to download PDF from: {pdf_url}"}
204
205        size_kb = len(pdf_bytes) // 1024
206        await self._log(f"Downloaded {size_kb} KB — extracting text...")
207
208        text, pages = await loop.run_in_executor(None, lambda: self._extract_text(pdf_bytes))
209        if not text:
210            return {"error": "PDF has no extractable text (may be a scanned image PDF)."}
211
212        self._manual_text   = text
213        self._manual_device = device
214        self._manual_url    = pdf_url
215        self._manual_pages  = pages
216
217        await self._log(f"Manual loaded: {device}{pages} pages, {len(text):,} chars")
218        await self._publish_status()
219
220        return {
221            "success": True,
222            "device":  device,
223            "url":     pdf_url,
224            "pages":   pages,
225            "chars":   len(text),
226            "preview": text[:300].replace("\n", " ").strip(),
227        }
228
229    # ── Search ─────────────────────────────────────────────────────────────
230
231    def _search_for_manual(self, device: str) -> Optional[str]:
232        try:
233            import httpx
234        except ImportError:
235            logger.error(f"[{self.name}] httpx is not installed — cannot search for manuals")
236            return None
237
238        headers = {
239            "User-Agent":      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36",
240            "Accept-Language": "en-US,en;q=0.9",
241            "Accept":          "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
242        }
243
244        # ── Pass 1: direct Philips document server (model number pattern) ──
245        model_m = re.search(r'EP\d{4}', device, re.IGNORECASE)
246        if model_m:
247            model = model_m.group(0).upper()
248            ml    = model.lower()
249            direct_urls = [
250                f"https://www.download.p4c.philips.com/files/e/{ml}/{ml}_pss_aenghk.pdf",
251                f"https://www.download.p4c.philips.com/files/e/{ml}_31/{ml}_31_pss_aenghk.pdf",
252                f"https://www.download.p4c.philips.com/files/e/{ml}/{ml}_user_manual_en.pdf",
253                f"https://www.documents.philips.com/doclib/enc/fetch/2000/4504/261257/261271/User_Manual_{model}.pdf",
254            ]
255            try:
256                with httpx.Client(follow_redirects=True, timeout=10, headers=headers) as client:
257                    for url in direct_urls:
258                        try:
259                            r = client.head(url)
260                            ct = r.headers.get("content-type", "")
261                            if r.status_code == 200 and ("pdf" in ct or url.endswith(".pdf")):
262                                logger.info(f"[{self.name}] Direct URL works: {url}")
263                                return url
264                        except Exception as e:
265                            logger.debug(f"[{self.name}] Direct URL failed ({url}): {e}")
266                            continue
267            except Exception as e:
268                logger.warning(f"[{self.name}] Philips direct check failed: {e}")
269
270        # ── Pass 2: DDGS search ────────────────────────────────────────────
271        result = self._search_ddgs(device)
272        if result:
273            return result
274
275        # ── Pass 3: Bing scrape (with redirect URL decoding) ───────────────
276        result = self._search_bing_scrape(device, headers)
277        if result:
278            return result
279
280        # ── Pass 4: Google scrape fallback ─────────────────────────────────
281        result = self._search_google_scrape(device, headers)
282        if result:
283            return result
284
285        logger.warning(f"[{self.name}] All search passes exhausted — no manual found for: {device}")
286        return None
287
288    # ── Pass 2: DDGS ──────────────────────────────────────────────────────
289
290    def _search_ddgs(self, device: str) -> Optional[str]:
291        queries = [
292            f"{device} user manual filetype:pdf",
293            f"{device} user manual PDF manualslib OR manualzz",
294            f"{device} owner manual PDF download",
295        ]
296
297        def get_url(r):
298            return r.get("href") or r.get("url") or r.get("link") or ""
299
300        try:
301            try:
302                from ddgs import DDGS
303                logger.info(f"[{self.name}] Pass 2: using ddgs package")
304            except ImportError:
305                from duckduckgo_search import DDGS
306                logger.info(f"[{self.name}] Pass 2: using duckduckgo_search (deprecated)")
307
308            with DDGS() as ddgs:
309                for query in queries:
310                    try:
311                        results = list(ddgs.text(query, max_results=15))
312                        logger.info(f"[{self.name}] Pass 2 query: {query!r}{len(results)} results")
313
314                        for i, r in enumerate(results[:5]):
315                            logger.info(
316                                f"[{self.name}]   [{i}] url={get_url(r)!r} "
317                                f"title={r.get('title', '')[:60]!r}"
318                            )
319
320                        match = self._pick_best_url(results, get_url)
321                        if match:
322                            logger.info(f"[{self.name}] Pass 2 HIT: {match}")
323                            return match
324
325                    except Exception as e:
326                        logger.warning(f"[{self.name}] DDGS query failed ({query}): {e}")
327                        continue
328        except ImportError:
329            logger.warning(f"[{self.name}] Neither ddgs nor duckduckgo_search installed — skipping")
330
331        return None
332
333    # ── Pass 3: Bing scrape ───────────────────────────────────────────────
334
335    def _search_bing_scrape(self, device: str, headers: dict) -> Optional[str]:
336        import httpx
337
338        queries = [
339            f"{device} user manual PDF",
340            f"{device} manual PDF manualslib OR manualzz",
341        ]
342
343        try:
344            with httpx.Client(follow_redirects=True, timeout=15, headers=headers) as client:
345                for query in queries:
346                    try:
347                        url  = "https://www.bing.com/search?q=" + urllib.parse.quote(query)
348                        r    = client.get(url)
349                        urls = self._extract_bing_urls(r.text)
350
351                        logger.info(f"[{self.name}] Pass 3 query: {query!r}{len(urls)} real URLs")
352                        for i, u in enumerate(urls[:10]):
353                            logger.info(f"[{self.name}]   [{i}] {u}")
354
355                        # Build fake result dicts so we can reuse _pick_best_url
356                        results = [{"href": u, "title": "", "body": ""} for u in urls]
357                        match   = self._pick_best_url(results, lambda r: r["href"])
358                        if match:
359                            logger.info(f"[{self.name}] Pass 3 HIT: {match}")
360                            return match
361
362                    except Exception as e:
363                        logger.warning(f"[{self.name}] Bing query failed ({query}): {e}")
364                        continue
365        except Exception as e:
366            logger.warning(f"[{self.name}] Bing scrape failed entirely: {e}")
367
368        return None
369
370    # ── Pass 4: Google scrape ─────────────────────────────────────────────
371
372    def _search_google_scrape(self, device: str, headers: dict) -> Optional[str]:
373        import httpx
374
375        queries = [
376            f"{device} user manual PDF",
377            f"{device} manual filetype:pdf",
378        ]
379
380        try:
381            with httpx.Client(follow_redirects=True, timeout=15, headers=headers) as client:
382                for query in queries:
383                    try:
384                        url = "https://www.google.com/search?q=" + urllib.parse.quote(query)
385                        r   = client.get(url)
386                        urls = self._extract_google_urls(r.text)
387
388                        logger.info(f"[{self.name}] Pass 4 query: {query!r}{len(urls)} real URLs")
389                        for i, u in enumerate(urls[:10]):
390                            logger.info(f"[{self.name}]   [{i}] {u}")
391
392                        results = [{"href": u, "title": "", "body": ""} for u in urls]
393                        match   = self._pick_best_url(results, lambda r: r["href"])
394                        if match:
395                            logger.info(f"[{self.name}] Pass 4 HIT: {match}")
396                            return match
397
398                    except Exception as e:
399                        logger.warning(f"[{self.name}] Google query failed ({query}): {e}")
400                        continue
401        except Exception as e:
402            logger.warning(f"[{self.name}] Google scrape failed entirely: {e}")
403
404        return None
405
406    # ── URL extraction helpers ─────────────────────────────────────────────
407
408    @staticmethod
409    def _extract_bing_urls(html: str) -> list[str]:
410        """
411        Extract real destination URLs from Bing search results HTML.
412        Bing wraps links as /ck/a?...&u=a1<base64url>...  — we decode those.
413        Also picks up any direct href links that aren't bing/microsoft.
414        """
415        urls = []
416        seen = set()
417
418        # Method 1: decode Bing redirect URLs  (/ck/a?...u=a1<base64>...)
419        for m in re.finditer(r'href="https?://www\.bing\.com/ck/a\?[^"]*?u=a1([A-Za-z0-9_-]+)[^"]*"', html):
420            try:
421                encoded = m.group(1)
422                # Fix base64url padding
423                padded  = encoded + "=" * (4 - len(encoded) % 4)
424                decoded = base64.urlsafe_b64decode(padded).decode("utf-8", errors="ignore")
425                if decoded.startswith("http") and decoded not in seen:
426                    seen.add(decoded)
427                    urls.append(decoded)
428            except Exception:
429                continue
430
431        # Method 2: direct hrefs that aren't search engine domains
432        for m in re.finditer(r'href=["\'](https?://[^"\'<>\s]+)', html):
433            link = m.group(1)
434            if not any(d in link for d in _SEARCH_ENGINE_DOMAINS) and link not in seen:
435                seen.add(link)
436                urls.append(link)
437
438        return urls
439
440    @staticmethod
441    def _extract_google_urls(html: str) -> list[str]:
442        """
443        Extract real destination URLs from Google search results HTML.
444        Google wraps links as /url?q=<url>&... — we extract the q parameter.
445        """
446        urls = []
447        seen = set()
448
449        # Method 1: Google redirect links
450        for m in re.finditer(r'/url\?q=(https?://[^&"]+)', html):
451            try:
452                decoded = urllib.parse.unquote(m.group(1))
453                if not any(d in decoded for d in _SEARCH_ENGINE_DOMAINS) and decoded not in seen:
454                    seen.add(decoded)
455                    urls.append(decoded)
456            except Exception:
457                continue
458
459        # Method 2: direct hrefs
460        for m in re.finditer(r'href=["\'](https?://[^"\'<>\s]+)', html):
461            link = m.group(1)
462            if not any(d in link for d in _SEARCH_ENGINE_DOMAINS) and link not in seen:
463                seen.add(link)
464                urls.append(link)
465
466        return urls
467
468    # ── Shared URL ranking ─────────────────────────────────────────────────
469
470    def _pick_best_url(self, results: list[dict], get_url_fn) -> Optional[str]:
471        """
472        From a list of search results, pick the best manual URL.
473        Priority: direct .pdf link > trusted site > any link with 'manual' + 'pdf' signals.
474        """
475        # Tier 1: direct .pdf link
476        for r in results:
477            u = get_url_fn(r)
478            if u.lower().endswith(".pdf"):
479                return u
480
481        # Tier 2: trusted manual site
482        for r in results:
483            u = get_url_fn(r)
484            if any(t in u for t in TRUSTED_SITES):
485                # ManualsLib pages need /download.pdf appended
486                if "manualslib.com" in u and not u.endswith(".pdf"):
487                    return u.rstrip("/") + "/download.pdf"
488                return u
489
490        # Tier 3: URL contains 'manual' or 'pdf' (but not a search engine)
491        for r in results:
492            u = get_url_fn(r)
493            u_lower = u.lower()
494            if u.startswith("http") and ("manual" in u_lower or "pdf" in u_lower):
495                if not any(d in u for d in _SEARCH_ENGINE_DOMAINS):
496                    return u
497
498        # Tier 4: body/title mentions 'pdf' or 'manual'
499        for r in results:
500            u = get_url_fn(r)
501            text = (r.get("body", "") + r.get("title", "")).lower()
502            if ("pdf" in text or "manual" in text) and u.startswith("http"):
503                if not any(d in u for d in _SEARCH_ENGINE_DOMAINS):
504                    return u
505
506        return None
507
508    # ── Download ───────────────────────────────────────────────────────────
509
510    async def _download_pdf(self, url: str) -> Optional[bytes]:
511        try:
512            import httpx
513        except ImportError:
514            logger.error(f"[{self.name}] httpx is not installed — cannot download PDF")
515            return None
516
517        headers = {
518            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36"
519        }
520        try:
521            async with httpx.AsyncClient(follow_redirects=True, timeout=60, headers=headers) as client:
522                resp = await client.get(url)
523                if resp.status_code != 200:
524                    logger.warning(f"[{self.name}] Download returned status {resp.status_code} for: {url}")
525                    return None
526                ct = resp.headers.get("content-type", "")
527                if "pdf" in ct or resp.content[:4] == b"%PDF":
528                    return resp.content
529                # HTML — hunt for embedded PDF link
530                links = re.findall(r'https?://[^\s"\'<>]+\.pdf', resp.text, re.IGNORECASE)
531                if links:
532                    logger.info(f"[{self.name}] Following embedded PDF link: {links[0]}")
533                    r2 = await client.get(links[0])
534                    if r2.status_code == 200 and r2.content[:4] == b"%PDF":
535                        return r2.content
536                logger.warning(f"[{self.name}] URL did not return a PDF: {url} (content-type: {ct})")
537        except Exception as e:
538            logger.warning(f"[{self.name}] Download failed for {url}: {e}")
539        return None
540
541    # ── Extract text ───────────────────────────────────────────────────────
542
543    def _extract_text(self, pdf_bytes: bytes) -> tuple[str, int]:
544        import io
545        try:
546            import pdfplumber
547            parts = []
548            with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
549                pages = len(pdf.pages)
550                for p in pdf.pages:
551                    t = p.extract_text()
552                    if t:
553                        parts.append(t)
554            if parts:
555                return "\n".join(parts), pages
556        except ImportError:
557            logger.warning(f"[{self.name}] pdfplumber not installed — trying pymupdf")
558        except Exception as e:
559            logger.warning(f"[{self.name}] pdfplumber extraction failed: {e}")
560
561        try:
562            import fitz
563            doc   = fitz.open(stream=pdf_bytes, filetype="pdf")
564            parts = [p.get_text() for p in doc]
565            return "\n".join(t for t in parts if t), len(doc)
566        except ImportError:
567            logger.error(f"[{self.name}] Neither pdfplumber nor pymupdf (fitz) installed — cannot extract text")
568        except Exception as e:
569            logger.warning(f"[{self.name}] pymupdf extraction failed: {e}")
570
571        return "", 0
572
573    # ── Ask ────────────────────────────────────────────────────────────────
574
575    async def _ask(self, question: str) -> dict:
576        if not self._manual_text:
577            return {
578                "error":  "No manual loaded yet.",
579                "hint":   'Send: {"action": "load_manual", "device": "Your Device Model"}',
580            }
581        if not self.llm:
582            return {"error": "No LLM configured on this agent."}
583
584        await self._log(f"Answering: {question}")
585
586        chunks  = self._chunk_text(self._manual_text, 600, 100)
587        ranked  = self._rank_chunks(chunks, question)[:6]
588        context = "\n\n---\n\n".join(ranked)
589
590        prompt = (
591            f"You are a helpful assistant. Answer the question below using ONLY the provided manual excerpt.\n\n"
592            f"Device: {self._manual_device}\n\n"
593            f"Manual excerpt:\n{context[:6000]}\n\n"
594            f"Question: {question}\n\n"
595            f"Give a clear, step-by-step answer based on the manual. "
596            f"If the manual doesn't contain the answer, say so."
597        )
598
599        if hasattr(self.llm, "complete"):
600            response, _ = await self.llm.complete(
601                messages=[{"role": "user", "content": prompt}],
602                system="You answer questions strictly based on provided manual content.",
603            )
604        else:
605            response = str(self.llm)
606
607        return {
608            "device":   self._manual_device,
609            "question": question,
610            "answer":   response,
611        }
612
613    # ── Status ─────────────────────────────────────────────────────────────
614
615    def _status(self) -> dict:
616        if not self._manual_device:
617            return {"status": "idle", "message": "No manual loaded."}
618        return {
619            "status":  "loaded",
620            "device":  self._manual_device,
621            "url":     self._manual_url,
622            "pages":   self._manual_pages,
623            "chars":   len(self._manual_text or ""),
624        }
625
626    # ── Helpers ────────────────────────────────────────────────────────────
627
628    _STOPWORDS = {
629        'how','do','i','the','a','an','is','are','what','where','when','why',
630        'can','does','to','for','of','in','on','at','my','this','that','it',
631        'its','with','and','or','be','was','will','has','have','use','using',
632        'get','me','please','tell','about','there','their','they','we','you',
633        'your','which','make','need',
634    }
635
636    def _keywords(self, text: str) -> list[str]:
637        words = re.findall(r'[a-z]+', text.lower())
638        return [w for w in words if w not in self._STOPWORDS and len(w) > 2]
639
640    def _chunk_text(self, text: str, chunk_size=600, overlap=100) -> list[str]:
641        words  = text.split()
642        chunks = []
643        i = 0
644        while i < len(words):
645            chunks.append(" ".join(words[i:i + chunk_size]))
646            i += chunk_size - overlap
647        return chunks
648
649    def _rank_chunks(self, chunks: list[str], question: str) -> list[str]:
650        kws    = self._keywords(question)
651        scored = [(sum(c.lower().count(kw) for kw in kws), c) for c in chunks]
652        scored.sort(key=lambda x: x[0], reverse=True)
653        return [c for _, c in scored]
654
655    # ── MQTT helpers ───────────────────────────────────────────────────────
656
657    async def _log(self, msg: str):
658        logger.info(f"[{self.name}] {msg}")
659        await self._mqtt_publish(
660            f"agents/{self.actor_id}/logs",
661            {"type": "log", "message": msg, "timestamp": time.time()},
662        )
663
664    async def _alert(self, msg: str, severity: str = "warning"):
665        logger.warning(f"[{self.name}] ALERT: {msg}")
666        await self._mqtt_publish(
667            f"agents/{self.actor_id}/alerts",
668            {"message": msg, "severity": severity, "timestamp": time.time()},
669        )

Pre-defined agent that finds, downloads, and answers questions from device manuals. Requires: httpx (+ pdfplumber or pymupdf for PDF extraction)

ManualAgent(llm_provider=None, **kwargs)
43    def __init__(self, llm_provider=None, **kwargs):
44        kwargs.setdefault("name", "manual-agent")
45        super().__init__(**kwargs)
46        self.llm              = llm_provider
47        self._manual_text:    Optional[str]  = None
48        self._manual_device:  Optional[str]  = None
49        self._manual_url:     Optional[str]  = None
50        self._manual_pages:   int            = 0
llm
async def on_start(self):
57    async def on_start(self):
58        await self._mqtt_publish(
59            f"agents/{self.actor_id}/logs",
60            {"type": "log", "message": "Manual agent ready. Send {action: load_manual, device: ...} to begin.", "timestamp": time.time()},
61        )
62        logger.info(f"[{self.name}] Ready.")

Called when actor starts. Override for init logic.

async def chat(self, message: str) -> str:
66    async def chat(self, message: str) -> str:
67        """
68        Synchronous-style entry point for CLIInterface and other direct callers.
69        Parses the message as JSON payload or plain-text question, executes the
70        action, and returns a human-readable string response.
71        """
72        payload = None
73        stripped = message.strip()
74        if stripped.startswith("{"):
75            try:
76                payload = json.loads(stripped)
77            except json.JSONDecodeError:
78                pass
79
80        if payload and isinstance(payload, dict):
81            result = await self._handle_task_payload(payload)
82        else:
83            if self._manual_text:
84                result = await self._ask(stripped)
85            else:
86                result = {
87                    "error": "No manual loaded yet.",
88                    "hint": 'Send: {"action": "load_manual", "device": "Your Device Model"}',
89                }
90
91        return self._format_result(result)

Synchronous-style entry point for CLIInterface and other direct callers. Parses the message as JSON payload or plain-text question, executes the action, and returns a human-readable string response.

async def handle_message(self, msg: Message):
126    async def handle_message(self, msg: Message):
127        if msg.type == MessageType.TASK:
128            try:
129                result = await self._handle_task(msg)
130            except Exception as e:
131                logger.error(f"[{self.name}] Task handling failed: {e}", exc_info=True)
132                result = {"error": f"Internal error: {e}"}
133
134            target = msg.reply_to or msg.sender_id
135            if target:
136                await self.send(target, MessageType.RESULT, result)
137            else:
138                logger.warning(
139                    f"[{self.name}] No reply target (reply_to={msg.reply_to!r}, "
140                    f"sender_id={msg.sender_id!r}). Result discarded: {result}"
141                )

Handle messages not caught by default handlers.

class PlannerAgent(wactorz.Actor):
  37class PlannerAgent(Actor):
  38    """
  39    On-demand orchestrator. Spawned per complex task, self-terminates when done.
  40    """
  41
  42    def __init__(
  43        self,
  44        llm_provider:   Optional[LLMProvider] = None,
  45        task:           str = "",
  46        reply_to_id:    str = "",
  47        reply_task_id:  str = "",
  48        auto_terminate: bool = True,
  49        **kwargs,
  50    ):
  51        kwargs.setdefault("name", "planner")
  52        super().__init__(**kwargs)
  53        self.llm              = llm_provider
  54        self._task            = task
  55        self._reply_to_id     = reply_to_id
  56        self._reply_task_id   = reply_task_id
  57        self._auto_terminate  = auto_terminate
  58        self._result_futures: dict[str, asyncio.Future] = {}
  59        self._spawned_by_planner: list[str] = []   # agents we created this run
  60
  61    def _current_task_description(self) -> str:
  62        return self._task[:60] if self._task else "waiting for task"
  63
  64    # ── Lifecycle ──────────────────────────────────────────────────────────
  65
  66    async def on_start(self):
  67        await self._log(f"Planner ready. Task: {self._task[:80]}")
  68        if self._task:
  69            asyncio.create_task(self._report_plan(self._task))
  70
  71    # ── Message handling ───────────────────────────────────────────────────
  72
  73    async def handle_message(self, msg: Message):
  74        if msg.type == MessageType.TASK:
  75            payload   = msg.payload if isinstance(msg.payload, dict) else {"text": str(msg.payload)}
  76            task_text = payload.get("text") or payload.get("task") or str(msg.payload)
  77            self._reply_to_id = payload.get("_reply_to") or msg.reply_to or msg.sender_id or self._reply_to_id
  78            task_id           = payload.get("_task_id")
  79            await self._log(f"Received task: {task_text[:80]}")
  80            result = await self._run_plan(task_text)
  81            if self._reply_to_id:
  82                # Use the initiating task_id (from main) so the future resolves,
  83                # falling back to the message-level task_id if present
  84                resolve_id = self._reply_task_id or task_id
  85                reply = {"result": result, "text": result}
  86                if resolve_id:
  87                    reply["_task_id"] = resolve_id
  88                if self._spawned_by_planner:
  89                    reply["spawned"] = self._spawned_by_planner
  90                await self.send(self._reply_to_id, MessageType.RESULT, reply)
  91
  92        elif msg.type == MessageType.RESULT:
  93            payload = msg.payload if isinstance(msg.payload, dict) else {}
  94            task_id = payload.get("_task_id")
  95            if task_id and task_id in self._result_futures:
  96                fut = self._result_futures[task_id]
  97                if not fut.done():
  98                    fut.set_result(payload)
  99
 100    # ── Report wrapper (on_start path) ────────────────────────────────────
 101
 102    async def _report_plan(self, task: str):
 103        """Run the plan and report the result back to main (used when task set at spawn time)."""
 104        result = await self._run_plan(task)
 105        if self._reply_to_id:
 106            reply = {"result": result, "text": result}
 107            if self._reply_task_id:
 108                reply["_task_id"] = self._reply_task_id
 109            if self._spawned_by_planner:
 110                reply["spawned"] = self._spawned_by_planner
 111            await self.send(self._reply_to_id, MessageType.RESULT, reply)
 112
 113    # ── Core pipeline ──────────────────────────────────────────────────────
 114
 115    # ── Pipeline registry ──────────────────────────────────────────────────
 116    # Each pipeline rule is stored here so users can list / delete them later.
 117    # Stored in persistent state under key "_pipeline_rules".
 118    #
 119    # Schema per rule:
 120    # {
 121    #   "rule_id":    str,       # unique slug
 122    #   "task":       str,       # original user request
 123    #   "agents":     [str],     # names of spawned agents for this rule
 124    #   "created_at": float,
 125    # }
 126
 127    def _load_pipeline_rules(self) -> list[dict]:
 128        return self.recall("_pipeline_rules") or []
 129
 130    def _save_pipeline_rule(self, rule: dict):
 131        rules = self._load_pipeline_rules()
 132        rules = [r for r in rules if r.get("rule_id") != rule["rule_id"]]
 133        rules.append(rule)
 134        self.persist("_pipeline_rules", rules)
 135
 136    # ── Pipeline detection & dispatch ──────────────────────────────────────
 137
 138    def _is_pipeline_request(task: str) -> bool:
 139        """
 140        Detect reactive/persistent pipeline requests vs one-shot tasks.
 141        Pipelines use conditional/temporal language: if/when/whenever/monitor/watch/notify.
 142        """
 143        import re
 144        lowered = task.lower()
 145
 146        # Explicit pipeline prefix always wins
 147        if lowered.startswith("pipeline:") or lowered.startswith("pipeline "):
 148            return True
 149
 150        patterns = [
 151            r"\bif\b.*\bthen\b",
 152            r"\bif\b.*\b(send|notify|alert|turn|open|close|post|message)\b",
 153            r"\bwhen\b.*\b(detect|open|turn|send|notify|alert|is|becomes|goes|changes)\b",
 154            r"\bwhenever\b",
 155            r"\bmonitor\b", r"\bwatch\b",
 156            r"\balert me\b", r"\bnotify me\b",
 157            r"\bsend me\b.*\b(when|if|discord|message|notification)\b",
 158            r"\bsend me a\b",
 159            r"\bautomatically\b",
 160            r"\bevery time\b", r"\bon detection\b",
 161            r"\bis turned on\b", r"\bis turned off\b",
 162            r"\bturns on\b", r"\bturns off\b",
 163            r"\bopens\b.*\b(send|notify|alert|light|turn)\b",
 164            r"\b(door|window|sensor|lamp|light|temperature|humidity|motion)\b.*\b(send|notify|discord|message)\b",
 165            # camera/detect + action = pipeline
 166            r"\b(camera|detect|yolo|webcam)\b.*\b(turn|open|send|notify|alert)\b",
 167            r"\b(person|motion|object)\b.*\bdetect.*\b(turn|open|light|send)\b",
 168        ]
 169        return any(re.search(p, lowered) for p in patterns)
 170
 171    async def _run_plan(self, task: str) -> str:
 172        workers = self._discover_workers()
 173        await self._log(f"Workers available: {[w['name'] for w in workers]}")
 174
 175        # Detect pipeline vs one-shot
 176        is_pipeline = PlannerAgent._is_pipeline_request(task)
 177        if is_pipeline:
 178            await self._log("Pipeline request detected — spawning persistent agents...")
 179            return await self._run_pipeline(task, workers)
 180
 181        # ── 1. Check cache ─────────────────────────────────────────────────
 182        cache_key  = _task_hash(task)
 183        cached     = self._load_cached_plan(cache_key, workers)
 184        if cached:
 185            await self._log(f"Cache hit — reusing plan ({len(cached)} steps)")
 186            plan = cached
 187        else:
 188            await self._log("No cache hit — generating plan with LLM...")
 189            plan = await self._decompose(task, workers)
 190            if not plan:
 191                await self._log("Decomposition failed — answering directly")
 192                return await self._llm_answer(task)
 193
 194        # ── 2. Spawn any missing agents declared in the plan ───────────────
 195        plan = await self._ensure_agents(plan)
 196
 197        # ── 3. Execute ─────────────────────────────────────────────────────
 198        await self._log(f"Executing {len(plan)} step(s)...")
 199        results = await self._execute(plan)
 200
 201        # ── 4. Synthesize ──────────────────────────────────────────────────
 202        answer = await self._synthesize(task, plan, results)
 203
 204        # ── 5. Cache successful plan ───────────────────────────────────────
 205        if not cached:
 206            self._save_plan_cache(cache_key, task, plan)
 207            await self._log("Plan cached for future reuse.")
 208
 209        await self._log("Task complete.")
 210        if self._auto_terminate:
 211            asyncio.create_task(self._deferred_stop())
 212
 213        return answer
 214
 215    # ── Pipeline mode (persistent reactive agents) ─────────────────────────
 216
 217
 218    async def _run_pipeline(self, task: str, workers: list[dict]) -> str:
 219        """
 220        Builds and spawns persistent reactive agents for if/when/whenever rules.
 221
 222        Flow:
 223          1. _decompose_pipeline queries HomeAssistantAgent for real entity IDs
 224          2. LLM produces spawn configs (ha_actuator for HA actions, dynamic for everything else)
 225          3. Each agent is spawned and registered in main's spawn registry
 226          4. Rule is saved so it can be listed/deleted later
 227          5. Summary returned to the user
 228
 229        Multiple rules in one request are fully supported.
 230        """
 231        plan = await self._decompose_pipeline(task, workers)
 232
 233        if not plan:
 234            await self._log("Pipeline decomposition failed — falling back to direct answer")
 235            return await self._llm_answer(task)
 236
 237        if len(plan) == 1 and "_feasibility_error" in plan[0]:
 238            error = plan[0]["_feasibility_error"]
 239            await self._log(f"Pipeline not feasible: {error}")
 240            return f"Cannot set up this pipeline:\n\n{error}"
 241
 242        await self._log(f"Pipeline plan: {len(plan)} agent(s)")
 243        spawned: list[str] = []
 244        wired: list[str] = []
 245        rule_agents: list[str] = []
 246
 247        for step in plan:
 248            name = step.get("name", "").strip()
 249            description = step.get("description", "")
 250            spawn_cfg = step.get("spawn_config")
 251
 252            if not name:
 253                await self._log("Step missing name — skipping")
 254                continue
 255
 256            if self._registry and self._registry.find_by_name(name):
 257                await self._log(f"'{name}' already running — skipping")
 258                wired.append(f"**{name}** (already active)")
 259                rule_agents.append(name)
 260                continue
 261
 262            if not spawn_cfg:
 263                await self._log(f"Step '{name}' has no spawn_config — skipping")
 264                continue
 265
 266            spawn_cfg = dict(spawn_cfg)
 267            spawn_cfg["name"] = name
 268
 269            spawn_type = spawn_cfg.get("type", "dynamic")
 270            await self._log(f"Spawning '{name}' (type={spawn_type})...")
 271            try:
 272                actor = await self._spawn_agent(spawn_cfg)
 273            except Exception as e:
 274                await self._log(f"Spawn failed for '{name}': {e}")
 275                wired.append(f"**{name}** — spawn failed: {e}")
 276                continue
 277
 278            if actor:
 279                self._spawned_by_planner.append(name)
 280                spawned.append(name)
 281                rule_agents.append(name)
 282
 283                # Register in main's spawn registry for auto-restore on restart
 284                if self._registry:
 285                    main = self._registry.find_by_name("main")
 286                    if main and hasattr(main, "_save_to_spawn_registry"):
 287                        registry_cfg = dict(spawn_cfg)
 288                        registry_cfg["name"] = name
 289                        registry_cfg["_rule"] = True
 290                        registry_cfg["_rule_task"] = task[:200]
 291                        main._save_to_spawn_registry(registry_cfg)
 292
 293                topics = spawn_cfg.get("mqtt_topics", [])
 294                label = f"**{name}** — {description}"
 295                if topics:
 296                    label += "\n  listens: " + ", ".join(topics)
 297                wired.append(label)
 298                await asyncio.sleep(0.3)
 299            else:
 300                wired.append(f"**{name}** — failed to spawn")
 301
 302        # Persist this rule into main's pipeline rules registry
 303        if rule_agents:
 304            import hashlib as _hl
 305            rule_id = _hl.md5(task.encode()).hexdigest()[:8]
 306            rule = {
 307                "rule_id": rule_id,
 308                "task": task,
 309                "agents": rule_agents,
 310                "created_at": time.time(),
 311            }
 312            # Save into main so it survives planner self-termination
 313            if self._registry:
 314                main = self._registry.find_by_name("main")
 315                if main and hasattr(main, "save_pipeline_rule"):
 316                    main.save_pipeline_rule(rule)
 317                    logger.info(f"[{self.name}] Pipeline rule {rule_id} saved to main")
 318
 319        self._auto_terminate = False
 320
 321        if not wired:
 322            return "Pipeline plan generated but no agents could be spawned. Check logs."
 323
 324        out = ["Pipeline active! Here's what I set up:\n"]
 325        out += [f"{i+1}. {w}" for i, w in enumerate(wired)]
 326        out.append("\nThese agents run continuously and react to events automatically.")
 327        out.append("Use `/rules` to see all active pipeline rules.")
 328        if spawned:
 329            out.append(f"\nSpawned: {', '.join(spawned)} — will auto-restore on restart.")
 330        return "\n".join(out)
 331
 332    async def _decompose_pipeline(self, task: str, workers: list[dict]) -> list[dict]:
 333        """
 334        Decomposes a reactive pipeline request into persistent agent spawn configs.
 335
 336        Flow:
 337          1. Query HomeAssistantAgent for live entities (delegates — no duplication)
 338          2. Feasibility check — surface clear error if required HA entities are missing
 339          3. LLM produces spawn configs with real entity IDs and correct MQTT wiring
 340        """
 341        if not self.llm:
 342            return []
 343
 344        # ── 1. Get HA entities via HomeAssistantAgent ──────────────────────
 345        ha_entities_text = ""
 346        ha_available = False
 347
 348        try:
 349            if self._registry and self._registry.find_by_name("home-assistant-agent"):
 350                result = await self._delegate("home-assistant-agent", "list_entities")
 351                if result and not result.get("error"):
 352                    entities_list = result.get("entities", [])
 353                    if entities_list:
 354                        lines = []
 355                        for e in entities_list[:200]:
 356                            eid = e.get("entity_id", "")
 357                            ename = e.get("name", "")
 358                            plat = e.get("platform", "")
 359                            if eid:
 360                                parts = [eid]
 361                                if ename and ename != eid:
 362                                    parts.append(f"name={ename}")
 363                                if plat:
 364                                    parts.append(f"platform={plat}")
 365                                lines.append("  " + "  ".join(parts))
 366                        ha_entities_text = "\n".join(lines)
 367                        ha_available = True
 368                        logger.info(f"[{self.name}] Got {len(entities_list)} HA entities via home-assistant-agent")
 369        except Exception as e:
 370            logger.warning(f"[{self.name}] Could not query home-assistant-agent: {e}")
 371
 372        # Fallback: fetch directly if HA agent is unavailable
 373        if not ha_available:
 374            try:
 375                from ..config import CONFIG
 376                from ..core.integrations.home_assistant.ha_helper import fetch_devices_entities_with_location
 377                ha_url = (CONFIG.ha_url or "").rstrip("/")
 378                ha_token = (CONFIG.ha_token or "").strip()
 379                if ha_url and ha_token:
 380                    devices = await fetch_devices_entities_with_location(ha_url, ha_token, include_states=True)
 381                    lines = []
 382                    for device in devices[:150]:
 383                        area = device.get("area", "")
 384                        for entity in device.get("entities", []):
 385                            eid = entity.get("entity_id", "")
 386                            ename = entity.get("friendly_name") or entity.get("name", "")
 387                            state = entity.get("state", "")
 388                            if eid:
 389                                parts = [eid]
 390                                if ename: parts.append(f"name={ename}")
 391                                if area: parts.append(f"area={area}")
 392                                if state: parts.append(f"state={state}")
 393                                lines.append("  " + "  ".join(parts))
 394                    ha_entities_text = "\n".join(lines)
 395                    ha_available = bool(lines)
 396                    logger.info(f"[{self.name}] Direct HA fetch: {len(lines)} entities")
 397            except Exception as e:
 398                logger.warning(f"[{self.name}] Direct HA fetch failed: {e}")
 399
 400        ha_section = ha_entities_text if ha_entities_text else \
 401            "  (HA not reachable — use entity IDs provided by the user)"
 402
 403        # ── Fetch stored notification URLs from main ──────────────────────
 404        notification_urls: dict = {}
 405        if self._registry:
 406            main = self._registry.find_by_name("main")
 407            if main and hasattr(main, "get_notification_urls"):
 408                notification_urls = main.get_notification_urls()
 409
 410        # Also extract any URL directly mentioned in the task
 411        import re as _re
 412        _url_match = _re.search(
 413            r'https?://(?:discord\.com/api/webhooks|hooks\.slack\.com|api\.telegram\.org)/\S+',
 414            task
 415        )
 416        if _url_match:
 417            url = _url_match.group(0).rstrip(".,;!)'\"")
 418            if "discord" in url:
 419                notification_urls["discord"] = url
 420            elif "slack" in url:
 421                notification_urls["slack"] = url
 422            elif "telegram" in url:
 423                notification_urls["telegram"] = url
 424
 425        notif_section = ""
 426        if notification_urls:
 427            lines = ["NOTIFICATION URLS (use these directly in code — do not use placeholders):"]
 428            for svc, url in notification_urls.items():
 429                lines.append(f"  {svc}: {url}")
 430            notif_section = "\n".join(lines)
 431        else:
 432            notif_section = (
 433                "NOTIFICATION URLS: none stored.\n"
 434                "If the user wants Discord/Slack/Telegram notifications and no URL is available,\n"
 435                "use a placeholder 'WEBHOOK_URL_REQUIRED' and set description to explain the user must run:\n"
 436                "  /webhook discord <url>"
 437            )
 438        _local_kw = ("camera", "webcam", "laptop", "detect", "yolo", "person",
 439                     "object detection", "cv2", "opencv",
 440                     "discord", "telegram", "slack", "notify", "notification", "message")
 441        _skip_feasibility = any(kw in task.lower() for kw in _local_kw)
 442
 443        if ha_available and ha_entities_text and not _skip_feasibility:
 444            feas_prompt = (
 445                "Check if this reactive automation can be fulfilled with available HA entities.\n\n"
 446                f"USER REQUEST: {task}\n\n"
 447                f"AVAILABLE HA ENTITIES:\n{ha_section}\n\n"
 448                'Return JSON only:\n'
 449                '{"feasible": true/false, "reason": "<one sentence if not feasible>", "relevant_entities": ["entity_id", ...]}\n\n'
 450                "Rules:\n"
 451                "- feasible=true only if ALL required entity types exist\n"
 452                "- Camera/webcam/Discord/notification requests: always feasible=true"
 453            )
 454            try:
 455                feas_resp, _ = await self.llm.complete(
 456                    messages=[{"role": "user", "content": feas_prompt}],
 457                    system="Output only valid JSON. No markdown.",
 458                    max_tokens=400,
 459                )
 460                clean = feas_resp.strip()
 461                for fence in ("```json", "```"):
 462                    if clean.startswith(fence):
 463                        clean = clean[len(fence):]
 464                    if clean.endswith("```"):
 465                        clean = clean[:-3]
 466                clean = clean.strip()
 467                feas = json.loads(clean)
 468                if not feas.get("feasible", True):
 469                    reason = feas.get("reason", "Cannot fulfill request with available HA entities.")
 470                    logger.warning(f"[{self.name}] Feasibility failed: {reason}")
 471                    return [{"_feasibility_error": reason}]
 472                logger.info(f"[{self.name}] Feasibility OK — relevant: {feas.get('relevant_entities', [])}")
 473            except Exception as e:
 474                logger.warning(f"[{self.name}] Feasibility check error (continuing): {e}")
 475
 476        # ── 3. Decompose into spawn configs ────────────────────────────────
 477        # Build the prompt as a list of parts to avoid f-string escape issues
 478        prompt_parts = [
 479            "You are designing reactive automation pipelines for a multi-agent IoT system.",
 480            "Output ONLY a valid JSON array — no explanation, no markdown, no code fences.",
 481            "",
 482            "═══ SYSTEM ARCHITECTURE ═══",
 483            "",
 484            "HomeAssistantStateBridgeAgent (ALWAYS running, NEVER spawn again):",
 485            "  Publishes every HA state change to MQTT.",
 486            "  Topic format depends on HA_STATE_BRIDGE_PER_ENTITY config — can be either:",
 487            "    Flat:       homeassistant/state_changes                          (all entities, one topic)",
 488            "    Per-entity: homeassistant/state_changes/{domain}/{full_entity_id} (one topic per entity)",
 489            "  ALWAYS subscribe to the wildcard: homeassistant/state_changes/#",
 490            "  This catches BOTH formats and never breaks regardless of config.",
 491            '  Payload always contains: {"entity_id": "light.wiz_...", "domain": "light", "new_state": {"state": "on", ...}, "old_state": {...}}',
 492            "  Filter by entity_id IN THE PAYLOAD — never rely on the topic path for filtering.",
 493            "  NOTE: 'state' is NESTED inside new_state — check payload['new_state']['state'].",
 494            "",
 495            "═══ AGENT TYPES ═══",
 496            "",
 497            'TYPE 1 — "ha_actuator"',
 498            "  Purpose: call any Home Assistant service (turn_on, turn_off, set_temperature, open_cover, etc.)",
 499            "  No code needed. Subscribes to an MQTT trigger topic and calls the HA service.",
 500            "  detection_filter matches TOP-LEVEL keys of the incoming payload only.",
 501            "  spawn_config schema:",
 502            '    "type": "ha_actuator"',
 503            '    "automation_id": "<unique-kebab-id>"',
 504            '    "description": "<what this does>"',
 505            '    "mqtt_topics": ["<trigger-topic>"]',
 506            '    "actions": [{"domain": "<ha-domain>", "service": "<ha-service>", "entity_id": "<entity_id-from-list>", "service_data": {}}]',
 507            '    "conditions": []',
 508            '    "detection_filter": {"<top-level-key>": <value>} or null',
 509            '    "cooldown_seconds": <number>',
 510            "",
 511            'TYPE 2 — "dynamic"',
 512            "  Purpose: any logic that needs code — state filtering, webcam, timers, HTTP webhooks, Discord, etc.",
 513            "  Define these async functions (all optional except at least one must exist):",
 514            "    async def setup(agent)   — runs once on start, good for subscriptions and init",
 515            "    async def process(agent) — runs in a loop every poll_interval seconds",
 516            "  Available APIs (ONLY these — no other agent methods exist):",
 517            '    await agent.log("message")                        — structured log',
 518            '    await agent.publish("topic", {dict})              — publish to MQTT',
 519            '    agent.subscribe("topic", async_callback)          — subscribe to MQTT, callback(payload_dict) per message',
 520            '                                                        IMPORTANT: runs as background task, setup() returns immediately',
 521            '    agent.recall("key")                               — load persisted value',
 522            '    agent.persist("key", value)                       — save persisted value',
 523            '    agent.state["key"]                                — in-memory dict (cleared on restart)',
 524            "  CRITICAL RULES FOR DYNAMIC AGENT CODE:",
 525            "    NEVER import or use aiomqtt directly — use agent.subscribe() instead",
 526            "    NEVER hardcode MQTT broker hostnames or ports — agent.subscribe() handles this automatically",
 527            "    NEVER use asyncio.create_task() for MQTT — agent.subscribe() already creates the background task",
 528            "    agent.subscribe() is non-blocking — call it in setup() and return immediately",
 529            "  spawn_config schema:",
 530            '    "type": "dynamic"',
 531            '    "description": "<what this does>"',
 532            '    "install": ["<pip-package>", ...]       — packages to install before running',
 533            '    "poll_interval": <seconds>              — how often process(agent) runs',
 534            '    "code": "<full python source as single string with \\n for newlines>"',
 535            "",
 536            "═══ CANONICAL WIRING PATTERNS ═══",
 537            "",
 538            "PATTERN 1 — HA sensor triggers HA action (door → light, motion → switch, temp → AC):",
 539            "  Problem: HA state is nested in new_state.state, ha_actuator can only filter top-level keys.",
 540            "  Solution: use a dynamic filter agent to extract and re-publish the trigger.",
 541            "  Agent 1 (dynamic, name: '<slug>-state-filter'):",
 542            "    setup(agent): use agent.subscribe() to listen to homeassistant/state_changes/{domain}/{entity_id}",
 543            "      Check new_state['state'] against condition, if met: await agent.publish('custom/triggers/<slug>', {'triggered': True})",
 544            "    agent.subscribe() runs as a background task — setup() must return immediately after calling it.",
 545            "  Agent 2 (ha_actuator, name: '<slug>-actuator'):",
 546            "    mqtt_topics: ['custom/triggers/<slug>']",
 547            "    detection_filter: {'triggered': True}",
 548            "    actions: [the HA service call with the correct entity_id]",
 549            "  CONDITION EXAMPLES:",
 550            "    Binary sensor (door/window/motion): new_state['state'] == 'on'",
 551            "    Numeric sensor (temperature/humidity): float(new_state.get('state', 0)) > threshold",
 552            "    Switch/light: new_state['state'] == 'on' or 'off'",
 553            "  PATTERN 1 CODE TEMPLATE:",
 554            "    async def setup(agent):",
 555            "        async def on_state(payload):",
 556            "            if payload.get('entity_id') != 'light.wiz_rgbw_tunable_02cba0': return",
 557            "            state = payload.get('new_state', {}).get('state', '')",
 558            "            if state == 'on':  # adapt condition to user request",
 559            "                await agent.publish('custom/triggers/<slug>', {'triggered': True, 'state': state})",
 560            "        # Use wildcard — works regardless of per-entity or flat topic config",
 561            "        agent.subscribe('homeassistant/state_changes/#', on_state)",
 562            "",
 563            "PATTERN 2 — HA sensor triggers notification (Discord, Slack, HTTP webhook):",
 564            "  ONE dynamic agent using agent.subscribe():",
 565            "    async def setup(agent):",
 566            "        async def on_state(payload):",
 567            "            if payload.get('entity_id') != 'light.wiz_rgbw_tunable_02cba0': return",
 568            "            state = payload.get('new_state', {}).get('state', '')",
 569            "            if state == 'on':  # adapt condition",
 570            "                import httpx",
 571            "                async with httpx.AsyncClient() as c:",
 572            "                    await c.post('<WEBHOOK_URL>', json={'content': 'Lamp turned on!'})",
 573            "                await agent.log('Discord notification sent')",
 574            "        # Use wildcard — works regardless of per-entity or flat topic config",
 575            "        agent.subscribe('homeassistant/state_changes/#', on_state)",
 576            "  Install: httpx",
 577            "  IMPORTANT: use the exact webhook URL from NOTIFICATION URLS section below.",
 578            "",
 579            "PATTERN 3 — Webcam/camera object detection triggers HA action:",
 580            "  Agent 1 (dynamic, name: '<slug>-camera-detect'):",
 581            "    setup(agent): load YOLO model and open camera",
 582            "    process(agent): capture frame, run inference, determine if target object is detected,",
 583            "      publish {'detected': bool, 'target': '<object-name>', 'objects': [list-of-all-detected]}",
 584            "      to custom/detections/<slug>",
 585            "    Install: ultralytics, opencv-python",
 586            "    poll_interval: 1",
 587            "  Agent 2 (ha_actuator, name: '<slug>-actuator'):",
 588            "    mqtt_topics: ['custom/detections/<slug>']",
 589            "    detection_filter: {'detected': True}",
 590            "    actions: [HA service call]",
 591            "  IMPORTANT: publish {'detected': bool} not {'person_detected': bool} — generic for any object.",
 592            "  In code: target = '<object-name-from-user-request>'; detected = target in set(detected_labels)",
 593            "",
 594            "PATTERN 4 — Webcam detection triggers notification:",
 595            "  Agent 1: same as Pattern 3 agent 1",
 596            "  Agent 2 (dynamic, name: '<slug>-notify'):",
 597            "    setup(agent): use agent.subscribe() on custom/detections/<slug>",
 598            "      When detected=True: POST notification via httpx",
 599            "",
 600            "PATTERN 5 — Timer/schedule triggers HA action:",
 601            "  Agent 1 (dynamic, name: '<slug>-timer'):",
 602            "    process(agent): check current time (import datetime), if matches schedule:",
 603            "      await agent.publish('custom/triggers/<slug>', {'triggered': True})",
 604            "    poll_interval: 60",
 605            "  Agent 2 (ha_actuator): subscribes to custom/triggers/<slug>",
 606            "",
 607            "═══ GENERAL RULES ═══",
 608            "- Use EXACT entity_id values from the HA entities list — never invent entity IDs",
 609            "- For HA service calls: look up the correct domain and service for the entity type",
 610            "  light → light.turn_on / light.turn_off",
 611            "  switch → switch.turn_on / switch.turn_off",
 612            "  climate → climate.set_temperature / climate.set_hvac_mode",
 613            "  cover → cover.open_cover / cover.close_cover",
 614            "  script → script.turn_on",
 615            "- Multiple rules in one request → output ALL agents for ALL rules",
 616            "- Each agent does exactly ONE job — keep it minimal",
 617            "- Replace <slug> consistently across paired agents with a short descriptive kebab-case id",
 618            "- ALWAYS subscribe to homeassistant/state_changes/# (wildcard) — NEVER to a specific sub-topic",
 619            "  Filter by entity_id in the payload: if payload.get('entity_id') != 'light.xyz': return",
 620            "  This works regardless of whether HA_STATE_BRIDGE_PER_ENTITY is on or off",
 621            "- If user provides a Discord webhook URL, use it directly in code",
 622            "- If user provides a condition threshold (e.g. 'above 28 degrees'), encode it in the filter agent code",
 623            "- Dynamic agent code must be a single string with actual \\n newlines (not literal backslash-n)",
 624            "",
 625            "═══ HOME ASSISTANT ENTITIES ═══",
 626            ha_section,
 627            "",
 628            "═══ NOTIFICATION URLS ═══",
 629            notif_section,
 630            "",
 631            "═══ OUTPUT FORMAT ═══",
 632            "JSON array. Each element:",
 633            '{"name": "<unique-kebab-name>", "description": "<one sentence>", "spawn_config": {<full spawn_config>}}',
 634            "",
 635            "═══ USER REQUEST ═══",
 636            task,
 637        ]
 638        prompt = "\n".join(prompt_parts)
 639
 640        try:
 641            response, _ = await self.llm.complete(
 642                messages=[{"role": "user", "content": prompt}],
 643                system="You are a JSON-only pipeline architect. Output only a valid JSON array. No markdown, no explanation.",
 644                max_tokens=4000,
 645            )
 646            clean = response.strip()
 647            if clean.startswith("```"):
 648                clean = "\n".join(clean.split("\n")[1:])
 649            if "```" in clean:
 650                clean = clean[:clean.rfind("```")]
 651            start = clean.find("[")
 652            end = clean.rfind("]")
 653            if start != -1 and end != -1:
 654                clean = clean[start:end + 1]
 655            plan = json.loads(clean.strip())
 656            if isinstance(plan, list):
 657                # Validate generated code — catch common LLM mistakes
 658                plan = self._validate_pipeline_code(plan)
 659                logger.info(f"[{self.name}] Pipeline plan: {len(plan)} step(s)")
 660                for i, step in enumerate(plan):
 661                    sc = step.get("spawn_config", {})
 662                    logger.info(
 663                        f"[{self.name}]   step {i + 1}: name={step.get('name')}  "
 664                        f"type={sc.get('type')}  topics={sc.get('mqtt_topics', [])}"
 665                    )
 666                return plan
 667        except Exception as e:
 668            logger.error(f"[{self.name}] Pipeline decomposition error: {e}")
 669        return []
 670
 671    # ── Pipeline code validator ────────────────────────────────────────────
 672
 673    def _validate_pipeline_code(self, plan: list[dict]) -> list[dict]:
 674        """
 675        Scan generated dynamic agent code for common LLM mistakes and fix them.
 676        Currently catches:
 677          - Raw aiomqtt.Client() usage (should use agent.subscribe() instead)
 678          - Hardcoded MQTT broker hostnames
 679        Logs warnings so the user knows what was fixed.
 680        """
 681        import re as _re
 682        for step in plan:
 683            sc = step.get("spawn_config", {})
 684            if sc.get("type") != "dynamic":
 685                continue
 686            code = sc.get("code", "")
 687            if not code:
 688                continue
 689
 690            issues = []
 691
 692            # Detect raw aiomqtt.Client() — LLM should use agent.subscribe()
 693            if "aiomqtt.Client(" in code or "aiomqtt.connect(" in code:
 694                issues.append("raw aiomqtt.Client() — should use agent.subscribe()")
 695                # Attempt to rewrite: extract topic and replace entire aiomqtt block
 696                # with agent.subscribe() pattern
 697                topics = _re.findall(r'await\s+client\.subscribe\(["\']([^"\']+)["\']', code)
 698                if topics:
 699                    topic = topics[0]
 700                    # Build replacement code using agent.subscribe()
 701                    fixed = self._rewrite_aiomqtt_to_subscribe(code, topic)
 702                    if fixed:
 703                        sc["code"] = fixed
 704                        code = fixed
 705                        logger.info(f"[{self.name}] Auto-fixed raw aiomqtt in '{step.get('name')}' → agent.subscribe('{topic}')")
 706
 707            if issues:
 708                logger.warning(
 709                    f"[{self.name}] Code issues in '{step.get('name')}': {'; '.join(issues)}"
 710                )
 711
 712        return plan
 713
 714    @staticmethod
 715    def _rewrite_aiomqtt_to_subscribe(code: str, topic: str) -> str:
 716        """
 717        Best-effort rewrite of raw aiomqtt MQTT subscription code to use agent.subscribe().
 718        Extracts the message handling callback and rewires it.
 719        Returns empty string if rewrite fails (original code kept).
 720        """
 721        import re as _re
 722
 723        # Try to extract the callback body — look for the inner async for loop body
 724        # Pattern: async for msg/message in client.messages: ... payload handling ...
 725        match = _re.search(
 726            r'async\s+for\s+\w+\s+in\s+client\.messages:\s*\n(.*?)(?=\n\s*except|\n\s*$)',
 727            code,
 728            _re.DOTALL,
 729        )
 730        if not match:
 731            return ""
 732
 733        callback_body = match.group(1)
 734
 735        # Detect how payload is parsed — json.loads(msg.payload) or similar
 736        payload_parse = ""
 737        if "json.loads" in callback_body:
 738            payload_parse = "    # payload is already a dict (parsed by agent.subscribe)\n"
 739
 740        # Strip leading indentation from callback body
 741        lines = callback_body.splitlines()
 742        min_indent = min((len(l) - len(l.lstrip()) for l in lines if l.strip()), default=4)
 743        dedented = "\n".join("    " + l[min_indent:] for l in lines if l.strip())
 744
 745        # Extract any setup code before the aiomqtt block
 746        pre_match = _re.split(r'async\s+with\s+aiomqtt\.Client', code)[0]
 747        pre_lines = [l for l in pre_match.splitlines()
 748                     if l.strip() and not l.strip().startswith("import aiomqtt")
 749                     and not l.strip().startswith("async def setup")]
 750        pre_code = "\n".join("    " + l.strip() for l in pre_lines if l.strip()) + "\n" if pre_lines else ""
 751
 752        rewritten = (
 753            f"async def setup(agent):\n"
 754            f"{pre_code}"
 755            f"    async def _on_message(payload):\n"
 756            f"{payload_parse}"
 757            f"{dedented}\n"
 758            f"    agent.subscribe('{topic}', _on_message)\n"
 759            f"    await agent.log('Subscribed to {topic}')\n"
 760        )
 761
 762        # Preserve any process() or handle_task() that existed
 763        import re as _re2
 764        for fn in ("process", "handle_task"):
 765            fn_match = _re2.search(rf'async\s+def\s+{fn}\s*\(', code)
 766            if fn_match:
 767                rewritten += "\n" + code[fn_match.start():]
 768                break
 769
 770        return rewritten
 771
 772    # ── Plan cache ─────────────────────────────────────────────────────────
 773
 774    def _load_cached_plan(self, cache_key: str, workers: list[dict]) -> Optional[list]:
 775        """Load a cached plan if it exists, is fresh, and all required agents are alive."""
 776        raw = self.recall(_PLAN_CACHE_KEY) or {}
 777        entry = raw.get(cache_key)
 778        if not entry:
 779            return None
 780
 781        # TTL check
 782        age = time.time() - entry.get("timestamp", 0)
 783        if age > _CACHE_TTL_S:
 784            logger.info(f"[{self.name}] Cache expired ({age/3600:.1f}h old)")
 785            return None
 786
 787        plan = entry.get("plan", [])
 788        if not plan:
 789            return None
 790
 791        # Validate all agents in the plan are still running
 792        alive = {w["name"] for w in workers} | {"main", self.name}
 793        for step in plan:
 794            agent = step.get("agent", "")
 795            if agent not in alive and not step.get("spawn_config"):
 796                logger.info(f"[{self.name}] Cache invalid — agent '{agent}' no longer running")
 797                return None
 798
 799        return plan
 800
 801    def _save_plan_cache(self, cache_key: str, task: str, plan: list):
 802        """Persist the plan so future similar tasks can reuse it."""
 803        raw = self.recall(_PLAN_CACHE_KEY) or {}
 804        # Evict entries older than TTL
 805        now = time.time()
 806        raw = {k: v for k, v in raw.items() if now - v.get("timestamp", 0) < _CACHE_TTL_S}
 807        raw[cache_key] = {
 808            "task":      task[:200],
 809            "plan":      plan,
 810            "timestamp": now,
 811        }
 812        self.persist(_PLAN_CACHE_KEY, raw)
 813
 814    # ── Worker discovery ───────────────────────────────────────────────────
 815
 816    def _discover_workers(self) -> list[dict]:
 817        if not self._registry:
 818            return []
 819        # Pull full manifests from main's capability registry (includes schemas)
 820        main = self._registry.find_by_name("main")
 821        manifest_map: dict = {}
 822        if main and hasattr(main, "list_capabilities"):
 823            for cap in main.list_capabilities():
 824                manifest_map[cap["name"]] = cap
 825
 826        workers = []
 827        for actor in self._registry.all_actors():
 828            if actor.name in _SKIP_AGENTS or actor.name == self.name:
 829                continue
 830            # Prefer manifest data (richer), fall back to live actor attrs
 831            manifest = manifest_map.get(actor.name, {})
 832            workers.append({
 833                "name":          actor.name,
 834                "type":          type(actor).__name__,
 835                "description":   (
 836                    manifest.get("description")
 837                    or getattr(actor, "description", "")
 838                    or getattr(actor, "system_prompt", "")[:100]
 839                    or type(actor).__name__
 840                ),
 841                "capabilities":  manifest.get("capabilities", []),
 842                "input_schema":  manifest.get("input_schema",  {}),
 843                "output_schema": manifest.get("output_schema", {}),
 844            })
 845        return workers
 846
 847    # ── Decomposition ──────────────────────────────────────────────────────
 848
 849    async def _decompose(self, task: str, workers: list[dict]) -> list[dict]:
 850        """LLM breaks task into steps. Can declare missing agents with spawn configs."""
 851        if not self.llm:
 852            return []
 853
 854        def _fmt_worker(w: dict) -> str:
 855            lines = [f"  - {w['name']} ({w['type']}): {w['description']}"]
 856            if w.get("capabilities"):
 857                lines.append(f"    capabilities: {', '.join(w['capabilities'])}")
 858            if w.get("input_schema"):
 859                lines.append(f"    input_schema : {w['input_schema']}")
 860            if w.get("output_schema"):
 861                lines.append(f"    output_schema: {w['output_schema']}")
 862            return "\n".join(lines)
 863
 864        workers_desc = "\n".join(_fmt_worker(w) for w in workers)
 865
 866        prompt = f"""You are a task planner for a multi-agent system.
 867Break the task into steps. Each step is handled by one agent.
 868
 869AVAILABLE AGENTS (with input/output contracts):
 870{workers_desc}
 871
 872TASK: {task}
 873
 874OUTPUT RULES:
 875- Respond ONLY with a valid JSON array. No explanation, no markdown.
 876- Each step object:
 877  {{
 878    "step": <int>,
 879    "agent": "<agent-name>",
 880    "task": "<what to ask this agent>",
 881    "parallel": <true|false>,
 882    "depends_on": [<step ints>],
 883    "spawn_config": <null or spawn object if agent needs to be created>
 884  }}
 885- "parallel": true if this step can run concurrently with other parallel steps
 886- "depends_on": step numbers whose results this step needs (empty list if none)
 887- "spawn_config": if the ideal agent for a step does NOT exist in the available list,
 888  include a spawn config to create it.
 889  AGENT TYPE RULES:
 890    Use "llm" ONLY for pure conversation/Q&A/explanation agents (no external APIs or tools).
 891    Use "dynamic" for anything that fetches data, calls APIs, runs searches, or uses libraries.
 892    In dynamic agent code ALWAYS use: await agent.log(msg), await agent.publish(topic, dict), agent.state dict, agent.recall(key), agent.persist(key, val).
 893    NEVER use agent.logger — it does not exist. Use await agent.log(msg) instead.
 894  LLM agent example:
 895  {{
 896    "name": "translator-agent",
 897    "type": "llm",
 898    "system_prompt": "You are an expert translator. Translate text accurately."
 899  }}
 900  Dynamic agent example (for weather, news, search, APIs):
 901  {{
 902    "name": "weather-agent",
 903    "type": "dynamic",
 904    "description": "Fetches live weather data for a city",
 905    "input_schema":  {{"city": "str — city name to fetch weather for"}},
 906    "output_schema": {{"city": "str", "temp_c": "str", "description": "str"}},
 907    "poll_interval": 3600,
 908    "code": "async def setup(agent):\n    await agent.log('ready')\nasync def process(agent):\n    import asyncio\n    await asyncio.sleep(3600)\nasync def handle_task(agent, payload):\n    import httpx\n    city = payload.get('city', 'Athens')\n    async with httpx.AsyncClient(timeout=10) as c:\n        r = await c.get(f'https://wttr.in/{{city}}?format=j1')\n        d = r.json()\n    cur = d['current_condition'][0]\n    return {{'city': city, 'temp_c': cur['temp_C'], 'description': cur['weatherDesc'][0]['value']}}"
 909  }}
 910- The FINAL synthesis step should ALWAYS be assigned to "main" (not any other agent).
 911  Main will combine results using its LLM. Never assign synthesis to a domain agent.
 912- Only create new agents when TRULY necessary — prefer existing agents.
 913- If one agent can handle everything, output a single-step plan.
 914- Keep it minimal — avoid unnecessary steps.
 915- IMPORTANT: For any step that combines, summarizes, synthesizes or compares results
 916  from other steps, ALWAYS use "agent": "main" — never a domain agent.
 917- Domain agents (weather, news, manual, etc.) are for DATA RETRIEVAL only.
 918  "main" handles all reasoning, summarization and synthesis.
 919
 920Example:
 921[
 922  {{"step": 1, "agent": "weather-agent", "task": "Get weather in Athens", "parallel": true, "depends_on": [], "spawn_config": null}},
 923  {{"step": 2, "agent": "news-agent", "task": "Get AI news today", "parallel": true, "depends_on": [], "spawn_config": null}},
 924  {{"step": 3, "agent": "main", "task": "Summarize the weather and news results", "parallel": false, "depends_on": [1, 2], "spawn_config": null}}
 925]"""
 926
 927        try:
 928            response, _ = await self.llm.complete(
 929                messages=[{"role": "user", "content": prompt}],
 930                system="You are a JSON-only task planner. Output only valid JSON arrays, nothing else.",
 931                max_tokens=1500,
 932            )
 933            clean = response.strip()
 934            # Strip markdown fences
 935            if clean.startswith("```"):
 936                clean = "\n".join(clean.split("\n")[1:])
 937            if clean.endswith("```"):
 938                clean = "\n".join(clean.split("\n")[:-1])
 939            plan = json.loads(clean.strip())
 940            if isinstance(plan, list) and plan:
 941                return plan
 942        except Exception as e:
 943            logger.error(f"[{self.name}] Decomposition error: {e}")
 944        return []
 945
 946    # ── Missing agent spawning ─────────────────────────────────────────────
 947
 948    async def _ensure_agents(self, plan: list[dict]) -> list[dict]:
 949        """
 950        For any step with a spawn_config, spawn the agent if it's not running.
 951        Updates the plan with the actual agent name once spawned.
 952        """
 953        if not self._registry:
 954            return plan
 955
 956        for step in plan:
 957            spawn_config = step.get("spawn_config")
 958            if not spawn_config:
 959                continue
 960
 961            agent_name = spawn_config.get("name") or step.get("agent")
 962            existing   = self._registry.find_by_name(agent_name)
 963
 964            if existing:
 965                await self._log(f"Agent '{agent_name}' already running — skipping spawn")
 966                step["agent"] = agent_name
 967                continue
 968
 969            await self._log(f"Spawning missing agent: '{agent_name}'")
 970            try:
 971                actor = await self._spawn_agent(spawn_config)
 972                if actor:
 973                    step["agent"] = agent_name
 974                    self._spawned_by_planner.append(agent_name)
 975                    # Brief pause to let agent initialise
 976                    await asyncio.sleep(1.0)
 977                    await self._log(f"'{agent_name}' ready.")
 978                else:
 979                    await self._log(f"Failed to spawn '{agent_name}' — step will use main as fallback")
 980                    step["agent"] = "main"
 981            except Exception as e:
 982                logger.error(f"[{self.name}] Spawn of '{agent_name}' failed: {e}")
 983                step["agent"] = "main"
 984
 985        return plan
 986
 987    async def _spawn_agent(self, config: dict) -> Optional[Actor]:
 988        """Spawn an agent from a config dict — same logic as MainActor._spawn_from_config."""
 989        agent_type = config.get("type", "dynamic")
 990        name       = config.get("name", "spawned-agent")
 991
 992        if agent_type == "ha_actuator":
 993            from .home_assistant_actuator_agent import (
 994                HomeAssistantActuatorAgent, ActuatorConfig,
 995                ActuatorAction, ActuatorCondition,
 996            )
 997            # Ensure automation_id is unique — append short hash if needed
 998            automation_id = config.get("automation_id", name)
 999            if self._registry and self._registry.find_by_name(f"actuator-{automation_id[:20]}"):
1000                import hashlib
1001                suffix = hashlib.md5(f"{automation_id}{time.time()}".encode()).hexdigest()[:4]
1002                automation_id = f"{automation_id}-{suffix}"
1003                name = f"actuator-{automation_id[:20]}"
1004            actuator_config = ActuatorConfig(
1005                automation_id = automation_id,
1006                description   = config.get("description", ""),
1007                mqtt_topics   = config.get("mqtt_topics", []),
1008                actions       = [ActuatorAction.from_dict(a) for a in config.get("actions", [])],
1009                conditions    = [ActuatorCondition.from_dict(c) for c in config.get("conditions", [])],
1010                detection_filter = config.get("detection_filter"),
1011                cooldown_seconds = float(config.get("cooldown_seconds", 10.0)),
1012            )
1013            actor = await self.spawn(
1014                HomeAssistantActuatorAgent,
1015                config=actuator_config,
1016                name=name,
1017                persistence_dir=str(self._persistence_dir.parent),
1018            )
1019            await self._register_with_main(config)
1020            return actor
1021
1022        if agent_type == "llm":
1023            from .llm_agent import LLMAgent
1024            actor = await self.spawn(
1025                LLMAgent,
1026                name=name,
1027                llm_provider=self.llm,
1028                system_prompt=config.get("system_prompt", "You are a helpful assistant."),
1029                persistence_dir=str(self._persistence_dir.parent),
1030            )
1031            # Save to main's spawn registry so it persists across restarts
1032            await self._register_with_main(config)
1033            return actor
1034
1035        if agent_type == "dynamic":
1036            code = config.get("code", "").strip()
1037            if not code:
1038                logger.warning(f"[{self.name}] Dynamic spawn config has no code for '{name}'")
1039                return None
1040            from .dynamic_agent import DynamicAgent
1041            actor = await self.spawn(
1042                DynamicAgent,
1043                name=name,
1044                code=code,
1045                poll_interval=float(config.get("poll_interval") or 1.0),
1046                description=config.get("description", ""),
1047                input_schema=config.get("input_schema", {}),
1048                output_schema=config.get("output_schema", {}),
1049                llm_provider=self.llm,
1050                persistence_dir=str(self._persistence_dir.parent),
1051            )
1052            await self._register_with_main(config)
1053            return actor
1054
1055        if agent_type == "manual":
1056            from .manual_agent import ManualAgent
1057            actor = await self.spawn(
1058                ManualAgent,
1059                name=name,
1060                llm_provider=self.llm,
1061                persistence_dir=str(self._persistence_dir.parent),
1062            )
1063            await self._register_with_main(config)
1064            return actor
1065
1066        logger.warning(f"[{self.name}] Unknown agent type: '{agent_type}'")
1067        return None
1068
1069    async def _register_with_main(self, config: dict):
1070        """Tell main to add this agent to its spawn registry so it survives restarts."""
1071        if not self._registry:
1072            return
1073        main = self._registry.find_by_name("main")
1074        if main and hasattr(main, "_save_to_spawn_registry"):
1075            main._save_to_spawn_registry(config)
1076            logger.info(f"[{self.name}] Registered '{config.get('name')}' with main's spawn registry")
1077
1078    # ── Execution ──────────────────────────────────────────────────────────
1079
1080    async def _execute(self, plan: list[dict]) -> dict:
1081        results:   dict       = {}
1082        completed: set[int]   = set()
1083        remaining: list[dict] = list(plan)
1084
1085        while remaining:
1086            ready = [
1087                s for s in remaining
1088                if all(d in completed for d in (s.get("depends_on") or []))
1089            ]
1090            if not ready:
1091                logger.error(f"[{self.name}] Plan deadlock — aborting remaining steps")
1092                break
1093
1094            parallel   = [s for s in ready if s.get("parallel", False)]
1095            sequential = [s for s in ready if not s.get("parallel", False)]
1096
1097            if parallel:
1098                await self._log(f"Parallel: steps {[s['step'] for s in parallel]}")
1099                outputs = await asyncio.gather(
1100                    *[self._execute_step(s, results) for s in parallel],
1101                    return_exceptions=True,
1102                )
1103                for step, out in zip(parallel, outputs):
1104                    results[step["step"]] = out if not isinstance(out, Exception) else {"error": str(out)}
1105                    completed.add(step["step"])
1106                    remaining.remove(step)
1107
1108            for step in sequential:
1109                await self._log(f"Sequential: step {step['step']} → @{step['agent']}")
1110                results[step["step"]] = await self._execute_step(step, results)
1111                completed.add(step["step"])
1112                remaining.remove(step)
1113
1114        return results
1115
1116    async def _execute_step(self, step: dict, prior: dict) -> dict:
1117        agent_name = step.get("agent", "main")
1118        task_text  = step.get("task", "")
1119        depends_on = step.get("depends_on") or []
1120
1121        # Inject context from prior steps
1122        if depends_on:
1123            ctx = []
1124            for dep in depends_on:
1125                r = prior.get(dep, {})
1126                t = (r.get("result") or r.get("text") or r.get("answer") or str(r))[:600]
1127                ctx.append(f"[Step {dep} result]: {t}")
1128            if ctx:
1129                task_text += "\n\nContext from previous steps:\n" + "\n".join(ctx)
1130
1131        if agent_name in ("main", self.name):
1132            return {"result": await self._llm_answer(task_text)}
1133
1134        await self._log(f"  → @{agent_name}: {task_text[:60]}")
1135        result = await self._delegate(agent_name, task_text)
1136        if not result:
1137            return {"error": f"No response from {agent_name}"}
1138        # If agent reported an error, check if we can replan around it
1139        if "error" in result and "error_phase" in result:
1140            await self._log(
1141                f"  ⚠ @{agent_name} failed ({result['error_phase']}): {result['error'][:80]}"
1142            )
1143            # Try main as fallback synthesizer
1144            await self._log(f"  → falling back to @main for this step")
1145            fallback = await self._llm_answer(
1146                f"The agent '{agent_name}' failed. Do your best to answer: {task_text}"
1147            )
1148            return {"result": fallback, "fallback": True, "original_error": result["error"]}
1149        return result
1150
1151    # ── Delegation ─────────────────────────────────────────────────────────
1152
1153    async def _delegate(self, agent_name: str, task: str, timeout: float = 60.0) -> Optional[dict]:
1154        return await self._delegate_with_payload(agent_name, {"text": task}, timeout=timeout)
1155
1156    async def _delegate_with_payload(self, agent_name: str, payload: dict, timeout: float = 60.0) -> Optional[dict]:
1157        if not self._registry:
1158            return None
1159        target = self._registry.find_by_name(agent_name)
1160        if not target:
1161            logger.warning(f"[{self.name}] Agent '{agent_name}' not found for delegation")
1162            return {"error": f"Agent '{agent_name}' not found"}
1163
1164        import uuid
1165        task_id = str(uuid.uuid4())[:8]
1166        future: asyncio.Future = asyncio.get_running_loop().create_future()
1167        self._result_futures[task_id] = future
1168
1169        await self.send(target.actor_id, MessageType.TASK, {
1170            **payload, "_task_id": task_id, "_reply_to": self.actor_id
1171        })
1172        try:
1173            return await asyncio.wait_for(future, timeout=timeout)
1174        except asyncio.TimeoutError:
1175            logger.warning(f"[{self.name}] Timeout from '{agent_name}'")
1176            return {"error": f"Timeout from {agent_name}"}
1177        finally:
1178            self._result_futures.pop(task_id, None)
1179
1180    # ── Synthesis ──────────────────────────────────────────────────────────
1181
1182    async def _synthesize(self, task: str, plan: list[dict], results: dict) -> str:
1183        if not self.llm:
1184            parts = []
1185            for s in plan:
1186                r = results.get(s["step"], {})
1187                t = r.get("result") or r.get("text") or r.get("answer") or str(r)
1188                parts.append(f"[@{s['agent']}]: {t}")
1189            return "\n\n".join(parts)
1190
1191        results_text = []
1192        for s in plan:
1193            r = results.get(s["step"], {})
1194            t = (r.get("result") or r.get("text") or r.get("answer") or str(r))[:800]
1195            results_text.append(f"Step {s['step']} (@{s['agent']}): {t}")
1196
1197        prompt = (
1198            f"You collected results from multiple agents for this task:\n\n"
1199            f"ORIGINAL TASK: {task}\n\n"
1200            f"RESULTS:\n" + "\n\n".join(results_text) +
1201            "\n\nSynthesize into a single, clear, well-structured answer for the user. "
1202            "Do not mention agent names, step numbers, or internal system details."
1203        )
1204        try:
1205            response, _ = await self.llm.complete(
1206                messages=[{"role": "user", "content": prompt}],
1207                system="You synthesize multi-agent results into clean, user-facing answers.",
1208                max_tokens=2048,
1209            )
1210            return response
1211        except Exception as e:
1212            logger.error(f"[{self.name}] Synthesis failed: {e}")
1213            return "\n\n".join(results_text)
1214
1215    async def _llm_answer(self, task: str) -> str:
1216        if not self.llm:
1217            return f"[No LLM available: {task}]"
1218        try:
1219            response, _ = await self.llm.complete(
1220                messages=[{"role": "user", "content": task}],
1221                system="You are a helpful assistant.",
1222                max_tokens=2048,
1223            )
1224            return response
1225        except Exception as e:
1226            return f"[LLM error: {e}]"
1227
1228    # ── Helpers ────────────────────────────────────────────────────────────
1229
1230    async def _deferred_stop(self):
1231        await asyncio.sleep(2.0)
1232        await self._log("Self-terminating.")
1233        if self._registry:
1234            await self._registry.unregister(self.actor_id)
1235        await self.stop()
1236
1237    async def _log(self, msg: str):
1238        logger.info(f"[{self.name}] {msg}")
1239        await self._mqtt_publish(
1240            f"agents/{self.actor_id}/logs",
1241            {"type": "log", "message": msg, "timestamp": time.time()},
1242        )

On-demand orchestrator. Spawned per complex task, self-terminates when done.

PlannerAgent( llm_provider: Optional[wactorz.agents.llm_agent.LLMProvider] = None, task: str = '', reply_to_id: str = '', reply_task_id: str = '', auto_terminate: bool = True, **kwargs)
42    def __init__(
43        self,
44        llm_provider:   Optional[LLMProvider] = None,
45        task:           str = "",
46        reply_to_id:    str = "",
47        reply_task_id:  str = "",
48        auto_terminate: bool = True,
49        **kwargs,
50    ):
51        kwargs.setdefault("name", "planner")
52        super().__init__(**kwargs)
53        self.llm              = llm_provider
54        self._task            = task
55        self._reply_to_id     = reply_to_id
56        self._reply_task_id   = reply_task_id
57        self._auto_terminate  = auto_terminate
58        self._result_futures: dict[str, asyncio.Future] = {}
59        self._spawned_by_planner: list[str] = []   # agents we created this run
llm
async def on_start(self):
66    async def on_start(self):
67        await self._log(f"Planner ready. Task: {self._task[:80]}")
68        if self._task:
69            asyncio.create_task(self._report_plan(self._task))

Called when actor starts. Override for init logic.

async def handle_message(self, msg: Message):
73    async def handle_message(self, msg: Message):
74        if msg.type == MessageType.TASK:
75            payload   = msg.payload if isinstance(msg.payload, dict) else {"text": str(msg.payload)}
76            task_text = payload.get("text") or payload.get("task") or str(msg.payload)
77            self._reply_to_id = payload.get("_reply_to") or msg.reply_to or msg.sender_id or self._reply_to_id
78            task_id           = payload.get("_task_id")
79            await self._log(f"Received task: {task_text[:80]}")
80            result = await self._run_plan(task_text)
81            if self._reply_to_id:
82                # Use the initiating task_id (from main) so the future resolves,
83                # falling back to the message-level task_id if present
84                resolve_id = self._reply_task_id or task_id
85                reply = {"result": result, "text": result}
86                if resolve_id:
87                    reply["_task_id"] = resolve_id
88                if self._spawned_by_planner:
89                    reply["spawned"] = self._spawned_by_planner
90                await self.send(self._reply_to_id, MessageType.RESULT, reply)
91
92        elif msg.type == MessageType.RESULT:
93            payload = msg.payload if isinstance(msg.payload, dict) else {}
94            task_id = payload.get("_task_id")
95            if task_id and task_id in self._result_futures:
96                fut = self._result_futures[task_id]
97                if not fut.done():
98                    fut.set_result(payload)

Handle messages not caught by default handlers.

class DynamicAgent(wactorz.Actor):
 33class DynamicAgent(Actor):
 34    """
 35    Generic actor shell. Core behavior is provided as Python source code strings.
 36    The LLM writes setup/process/handle_task functions; this class runs them.
 37    """
 38
 39    def __init__(
 40        self,
 41        code: str,                          # LLM-generated Python source
 42        poll_interval: float = 1.0,         # seconds between process() calls
 43        description: str = "",              # what this agent does
 44        input_schema: dict = None,          # expected task payload fields
 45        output_schema: dict = None,         # returned result fields
 46        llm_provider=None,                  # optional LLM for agent.llm.chat()
 47        **kwargs,
 48    ):
 49        super().__init__(**kwargs)
 50        self._code           = code
 51        self.poll_interval   = poll_interval
 52        self.description     = description
 53        self.input_schema    = input_schema  or {}
 54        self.output_schema   = output_schema or {}
 55        self._llm_provider   = llm_provider
 56
 57        # Compiled functions — populated in on_start
 58        self._fn_setup       = None
 59        self._fn_process     = None
 60        self._fn_handle_task = None
 61
 62        # Namespace shared across all calls (agent can store state here)
 63        self._ns: dict       = {}
 64
 65        # Cost tracking (populated by _LLMInterface if LLM is used)
 66        self.total_input_tokens  = 0
 67        self.total_output_tokens = 0
 68        self.total_cost_usd      = 0.0
 69
 70        # Error tracking for health classification
 71        self._consecutive_errors: int   = 0
 72        self._error_threshold:    int   = 3      # DEGRADED after this many
 73        self._last_error_time:    float = 0.0
 74        self._error_phase:        str   = ""     # compile|setup|process|handle_task
 75
 76        # Public API exposed to generated code via `agent` parameter
 77        self._api            = _AgentAPI(self)
 78
 79    # ── Lifecycle ──────────────────────────────────────────────────────────
 80
 81    async def on_start(self):
 82        # ── Compile with LLM self-correction on syntax errors ─────────────
 83        current_code = self._code
 84        error_msg    = self._compile_code(current_code)
 85
 86        if error_msg:
 87            for attempt in range(1, self._MAX_COMPILE_RETRIES + 1):
 88                logger.warning(
 89                    f"[{self.name}] Compile error (attempt {attempt}): {error_msg}"
 90                )
 91                fixed = await self._fix_syntax_with_llm(current_code, error_msg)
 92                if fixed is None:
 93                    # LLM unavailable — no point retrying
 94                    break
 95                self._ns = {}                      # fresh namespace for retry
 96                new_err = self._compile_code(fixed)
 97                if new_err is None:
 98                    # Fix worked — update stored code so restarts use the good version
 99                    self._code = fixed
100                    error_msg  = None
101                    logger.info(f"[{self.name}] Code fixed by LLM after {attempt} attempt(s).")
102                    await self._mqtt_publish(
103                        f"agents/{self.actor_id}/logs",
104                        {"type": "log",
105                         "message": f"Syntax error fixed by LLM after {attempt} attempt(s).",
106                         "timestamp": time.time()},
107                    )
108                    break
109                # Fix compiled but still broken — feed it back for the next attempt
110                current_code = fixed
111                error_msg    = new_err
112
113        if error_msg:
114            # All attempts exhausted — publish fatal and stop
115            err_exc = SyntaxError(error_msg)
116            logger.error(f"[{self.name}] Code compilation failed permanently: {error_msg}")
117            await self._publish_error(phase="compile", error=err_exc,
118                                      traceback_str=error_msg, fatal=True)
119            return
120
121        # ── setup() ───────────────────────────────────────────────────────
122        if self._fn_setup:
123            # Run setup as a background task so long-running loops (e.g. aiomqtt
124            # subscriptions) don't block on_start() and prevent heartbeats from firing.
125            self._tasks.append(asyncio.create_task(self._run_setup()))
126        else:
127            if self._fn_process:
128                self._tasks.append(asyncio.create_task(self._process_loop()))
129
130        # Publish manifest immediately so main's registry knows this agent exists
131        # even if it never calls publish() (pure handle_task agents, etc.)
132        await self._api._publish_manifest()
133
134    async def on_stop(self):
135        # Give generated code a chance to clean up
136        cleanup = self._ns.get("cleanup")
137        if cleanup:
138            try:
139                await cleanup(self._api)
140            except Exception:
141                pass
142
143    # ── Code compilation ───────────────────────────────────────────────────
144
145    @staticmethod
146    def _sanitize_code(code: str) -> str:
147        """
148        Block-aware sanitizer. Removes LLM self-setup patterns entirely:
149        - try/except blocks containing LLM imports
150        - if/else blocks checking api_key or llm_backend
151        - orphan else:/elif: that follow sanitized blocks
152        - call_llm/call_openai/call_ollama functions -> agent.llm shim
153        - standalone bad lines
154        """
155        import re
156
157        LLM_PATTERNS = [
158            r"\bimport\s+(openai|anthropic|ollama|langchain)\b",
159            r"\bfrom\s+(openai|anthropic|ollama|langchain)\b",
160            r"\b(OPENAI_API_KEY|ANTHROPIC_API_KEY)\b",
161            r"os\.environ.*API_KEY",
162            r"\b(openai|anthropic|ollama)\.(OpenAI|Anthropic|Client|AsyncOpenAI|AsyncAnthropic)\b",
163            # api_key as a variable assignment (not as a dict key like 'api_key': ...)
164            r"^\s*api_key\s*=",
165            # llm_backend as a variable assignment only
166            r"^\s*agent\.state\[.llm_backend.\]\s*=",
167        ]
168
169        def line_is_bad(line):
170            return any(re.search(p, line) for p in LLM_PATTERNS)
171
172        def collect_block(lines, start, base_indent, conts=("except","else","finally","elif")):
173            j, block = start, []
174            pat = r"\s*(" + "|".join(conts) + r")\b" if conts else r"(?!x)x"
175            while j < len(lines):
176                bl = lines[j]
177                bl_ind = len(bl) - len(bl.lstrip()) if bl.strip() else base_indent + 4
178                if bl.strip() and bl_ind <= base_indent and not re.match(pat, bl):
179                    break
180                block.append(bl)
181                j += 1
182            return block, j
183
184        lines  = code.split("\n")
185        result = []
186        i      = 0
187        last_sanitized = False
188
189        while i < len(lines):
190            line     = lines[i]
191            stripped = line.strip()
192            indent   = len(line) - len(line.lstrip()) if stripped else 0
193            prefix   = " " * indent
194
195            if not stripped:
196                result.append(line)
197                last_sanitized = False
198                i += 1
199                continue
200
201            # try: blocks — nuke entirely if they touch LLM
202            if stripped == "try:":
203                block, j = collect_block(lines, i + 1, indent)
204                full = [line] + block
205                if any(line_is_bad(l) for l in full):
206                    result.append(prefix + "pass  # sanitized: LLM setup block")
207                    last_sanitized = True
208                else:
209                    result.extend(full)
210                    last_sanitized = False
211                i = j
212                continue
213
214            # if/elif whose condition references LLM vars — nuke whole branch
215            if re.match(r"\s*(if|elif)\b", line) and line_is_bad(line):
216                _, j = collect_block(lines, i + 1, indent, ("elif", "else"))
217                result.append(prefix + "pass  # sanitized: LLM conditional")
218                last_sanitized = True
219                i = j
220                continue
221
222            # orphan else:/elif: after a sanitized block — drop silently
223            if re.match(r"\s*(else\s*:|elif\b)", line) and last_sanitized:
224                _, j = collect_block(lines, i + 1, indent, ())
225                i = j
226                continue
227
228            # LLM wrapper functions — replace with agent.llm shim
229            fn_m = re.match(
230                r"(\s*)(async\s+)?def\s+"
231                r"(call_llm|call_openai|call_ollama|call_anthropic|call_gpt|"
232                r"get_llm|setup_llm|create_llm|query_llm|ask_llm|llm_call)\s*\(",
233                line,
234            )
235            if fn_m:
236                _, j = collect_block(lines, i + 1, len(fn_m.group(1)), ())
237                p, fname = fn_m.group(1), fn_m.group(3)
238                result += [
239                    p + "async def " + fname + "(agent, messages, system='', **kw):",
240                    p + "    # sanitized: rewired to agent.llm",
241                    p + "    sys_p = system or next((m.get('content','') for m in messages if m.get('role')=='system'), '')",
242                    p + "    msgs  = [m for m in messages if m.get('role') != 'system']",
243                    p + "    return await agent.llm.complete(messages=msgs, system=sys_p)",
244                ]
245                last_sanitized = False
246                i = j
247                continue
248
249            # standalone bad lines
250            if line_is_bad(line):
251                result.append(prefix + "pass  # sanitized: " + stripped[:60])
252                last_sanitized = True
253                i += 1
254                continue
255
256            last_sanitized = False
257            result.append(line)
258            i += 1
259
260        return "\n".join(result)
261
262
263
264
265    # Max times on_start will ask the LLM to fix a syntax error before giving up
266    _MAX_COMPILE_RETRIES = 2
267
268    def _compile_code(self, code: Optional[str] = None) -> Optional[str]:
269        """
270        Sanitize then compile LLM-generated code into self._ns.
271
272        Returns the error message string if compilation fails, None on success.
273        Callers use the error string to ask the LLM to fix the code and retry
274        (see on_start / _fix_syntax_with_llm).
275        """
276        source = code if code is not None else self._code
277        clean  = self._sanitize_code(source)
278
279        # Pre-inject the LLM shim so generated code can call agent.llm directly
280        def _get_llm_shim(*args, **kwargs):
281            return self._api.llm
282        self._ns["get_llm"]    = _get_llm_shim
283        self._ns["setup_llm"]  = _get_llm_shim
284        self._ns["create_llm"] = _get_llm_shim
285
286        try:
287            exec(compile(clean, f"<{self.name}>", "exec"), self._ns)
288            self._fn_setup       = self._ns.get("setup")
289            self._fn_process     = self._ns.get("process")
290            self._fn_handle_task = self._ns.get("handle_task")
291            fns = [f for f in ["setup", "process", "handle_task", "cleanup"] if f in self._ns]
292            logger.info(f"[{self.name}] Code compiled OK. Functions: {fns}")
293            if not fns:
294                logger.warning(f"[{self.name}] No functions found in compiled code.")
295            return None   # success
296        except Exception as e:
297            return f"{type(e).__name__}: {e}"
298
299    async def _fix_syntax_with_llm(self, bad_code: str, error_msg: str) -> Optional[str]:
300        """
301        Ask the configured LLM to fix a syntax error in agent code.
302
303        Returns the (possibly still-broken) code string from the LLM, or None
304        only if the LLM is completely unavailable (no provider, API error).
305        The caller is responsible for verifying the fix with _compile_code().
306        """
307        if self._llm_provider is None:
308            return None
309
310        prompt = (
311            "The following Python code has a syntax error.\n"
312            f"Error: {error_msg}\n\n"
313            "Fix ONLY the syntax error. Do not change logic or add features.\n"
314            "Return ONLY the corrected Python code — no explanations, "
315            "no markdown fences, no commentary.\n\n"
316            f"```python\n{bad_code}\n```"
317        )
318        logger.info(f"[{self.name}] Asking LLM to fix syntax error: {error_msg[:120]}")
319        await self._mqtt_publish(
320            f"agents/{self.actor_id}/logs",
321            {"type": "log",
322             "message": f"Syntax error — asking LLM to fix: {error_msg[:120]}",
323             "timestamp": time.time()},
324        )
325        try:
326            response, usage = await self._llm_provider.complete(
327                messages=[{"role": "user", "content": prompt}],
328                system="You are a Python syntax expert. Return only valid Python code.",
329                max_tokens=4096,
330            )
331            # Track cost
332            if hasattr(self, "total_input_tokens"):
333                self.total_input_tokens  += usage.get("input_tokens", 0)
334                self.total_output_tokens += usage.get("output_tokens", 0)
335                self.total_cost_usd      += usage.get("cost_usd", 0.0)
336
337            # Strip markdown fences the LLM may add despite instructions
338            fixed = response.strip()
339            if fixed.startswith("```"):
340                fixed = "\n".join(
341                    l for l in fixed.split("\n")
342                    if not l.strip().startswith("```")
343                ).strip()
344
345            return fixed   # caller validates with _compile_code()
346
347        except Exception as e:
348            logger.warning(f"[{self.name}] LLM fix call failed: {e}")
349            return None    # only None when LLM is truly unreachable
350
351    # ── Setup wrapper ───────────────────────────────────────────────────────
352
353    async def _run_setup(self):
354        """
355        Run setup() as a background task.
356        - Errors in setup() are published as fatal errors (agent won't restart).
357        - If process() is also defined, it is started AFTER setup() returns.
358          For agents whose setup() never returns (e.g. aiomqtt subscription loops),
359          process() is simply not started — the subscription loop IS the process.
360        """
361        try:
362            await self._fn_setup(self._api)
363            logger.info(f"[{self.name}] setup() completed.")
364        except asyncio.CancelledError:
365            return
366        except Exception as e:
367            err = traceback.format_exc()
368            logger.error(f"[{self.name}] setup() failed: {e}\n{err}")
369            await self._publish_error(phase="setup", error=e, traceback_str=err, fatal=True)
370            return
371        # setup() returned cleanly — start process() loop if defined
372        if self._fn_process and self.state not in (ActorState.STOPPED, ActorState.FAILED):
373            self._tasks.append(asyncio.create_task(self._process_loop()))
374
375    # ── Process loop ───────────────────────────────────────────────────────
376
377    async def _process_loop(self):
378        """Continuously call the generated process() function."""
379        while self.state not in (ActorState.STOPPED, ActorState.FAILED):
380            if self.state == ActorState.PAUSED:
381                await asyncio.sleep(self.poll_interval)
382                continue
383            try:
384                await self._fn_process(self._api)
385                self._reset_error_count()
386            except asyncio.CancelledError:
387                break
388            except Exception as e:
389                self.metrics.errors += 1
390                tb = traceback.format_exc()
391                logger.error(f"[{self.name}] process() error: {e}\n{tb}")
392                await self._publish_error(phase="process", error=e, traceback_str=tb)
393                backoff = min(2 ** self._consecutive_errors, 30)
394                await asyncio.sleep(backoff)
395            await asyncio.sleep(self.poll_interval)
396
397    # ── Message handling ───────────────────────────────────────────────────
398
399    async def handle_message(self, msg: Message):
400        if msg.type == MessageType.TASK:
401            self.metrics.messages_processed += 1
402            if self._fn_handle_task:
403                try:
404                    result = await self._fn_handle_task(self._api, msg.payload or {})
405                    if msg.sender_id and result is not None:
406                        await self.send(msg.sender_id, MessageType.RESULT, result)
407                except Exception as e:
408                    tb = traceback.format_exc()
409                    logger.error(f"[{self.name}] handle_task() error: {e}\n{tb}")
410                    await self._publish_error(phase="handle_task", error=e, traceback_str=tb)
411                    if msg.sender_id:
412                        await self.send(msg.sender_id, MessageType.RESULT, {
413                            "error":       str(e),
414                            "error_phase": "handle_task",
415                            "agent":       self.name,
416                        })
417            else:
418                if msg.sender_id:
419                    await self.send(msg.sender_id, MessageType.RESULT,
420                                    {"info": f"{self.name} has no handle_task defined"})
421
422    async def _publish_error(
423        self,
424        phase: str,
425        error: Exception,
426        traceback_str: str = "",
427        fatal: bool = False,
428    ):
429        """
430        Publish a structured error event to agents/{id}/errors AND send
431        a direct actor message to MonitorAgent so it works without MQTT.
432        """
433        self._consecutive_errors += 1
434        self._last_error_time     = time.time()
435        self._error_phase         = phase
436        severity = (
437            "critical"
438            if fatal or self._consecutive_errors >= self._error_threshold
439            else "warning"
440        )
441        event = {
442            "actor_id":    self.actor_id,
443            "name":        self.name,
444            "phase":       phase,
445            "error":       str(error),
446            "traceback":   traceback_str[-1200:] if traceback_str else "",
447            "consecutive": self._consecutive_errors,
448            "fatal":       fatal,
449            "severity":    severity,
450            "degraded":    self._consecutive_errors >= self._error_threshold,
451            "timestamp":   time.time(),
452        }
453        await self._mqtt_publish(f"agents/{self.actor_id}/errors", event)
454        # Direct actor message to monitor (works without MQTT broker)
455        if self._registry:
456            monitor = self._registry.find_by_name("monitor")
457            if monitor and monitor.actor_id != self.actor_id:
458                try:
459                    await self.send(monitor.actor_id, MessageType.TASK, {
460                        **event,
461                        "_monitor_error_event": True,
462                    })
463                except Exception:
464                    pass
465        # Mirror to /alert so the dashboard picks it up immediately
466        await self._mqtt_publish(f"agents/{self.actor_id}/alert", {
467            "actor_id":  self.actor_id,
468            "name":      self.name,
469            "message":   f"[{phase}] {error}",
470            "severity":  severity,
471            "timestamp": time.time(),
472        })
473
474    def _reset_error_count(self):
475        if self._consecutive_errors > 0:
476            logger.info(f"[{self.name}] Recovered — resetting error counter.")
477            self._consecutive_errors = 0
478            self._error_phase        = ""
479
480    def get_status(self) -> dict:
481        s = super().get_status()
482        s["description"] = self.description
483        s["code"]        = self._code
484        s["agent_type"]  = "dynamic"
485        return s
486
487    def _build_heartbeat(self) -> dict:
488        hb = super()._build_heartbeat()
489        hb["code"]        = self._code      # include code in every heartbeat
490        hb["description"] = self.description
491        hb["agent_type"]  = "dynamic"
492        return hb
493
494    def _current_task_description(self) -> str:
495        return self.description or "running dynamic code"

Generic actor shell. Core behavior is provided as Python source code strings. The LLM writes setup/process/handle_task functions; this class runs them.

DynamicAgent( code: str, poll_interval: float = 1.0, description: str = '', input_schema: dict = None, output_schema: dict = None, llm_provider=None, **kwargs)
39    def __init__(
40        self,
41        code: str,                          # LLM-generated Python source
42        poll_interval: float = 1.0,         # seconds between process() calls
43        description: str = "",              # what this agent does
44        input_schema: dict = None,          # expected task payload fields
45        output_schema: dict = None,         # returned result fields
46        llm_provider=None,                  # optional LLM for agent.llm.chat()
47        **kwargs,
48    ):
49        super().__init__(**kwargs)
50        self._code           = code
51        self.poll_interval   = poll_interval
52        self.description     = description
53        self.input_schema    = input_schema  or {}
54        self.output_schema   = output_schema or {}
55        self._llm_provider   = llm_provider
56
57        # Compiled functions — populated in on_start
58        self._fn_setup       = None
59        self._fn_process     = None
60        self._fn_handle_task = None
61
62        # Namespace shared across all calls (agent can store state here)
63        self._ns: dict       = {}
64
65        # Cost tracking (populated by _LLMInterface if LLM is used)
66        self.total_input_tokens  = 0
67        self.total_output_tokens = 0
68        self.total_cost_usd      = 0.0
69
70        # Error tracking for health classification
71        self._consecutive_errors: int   = 0
72        self._error_threshold:    int   = 3      # DEGRADED after this many
73        self._last_error_time:    float = 0.0
74        self._error_phase:        str   = ""     # compile|setup|process|handle_task
75
76        # Public API exposed to generated code via `agent` parameter
77        self._api            = _AgentAPI(self)
poll_interval
description
input_schema
output_schema
total_input_tokens
total_output_tokens
total_cost_usd
async def on_start(self):
 81    async def on_start(self):
 82        # ── Compile with LLM self-correction on syntax errors ─────────────
 83        current_code = self._code
 84        error_msg    = self._compile_code(current_code)
 85
 86        if error_msg:
 87            for attempt in range(1, self._MAX_COMPILE_RETRIES + 1):
 88                logger.warning(
 89                    f"[{self.name}] Compile error (attempt {attempt}): {error_msg}"
 90                )
 91                fixed = await self._fix_syntax_with_llm(current_code, error_msg)
 92                if fixed is None:
 93                    # LLM unavailable — no point retrying
 94                    break
 95                self._ns = {}                      # fresh namespace for retry
 96                new_err = self._compile_code(fixed)
 97                if new_err is None:
 98                    # Fix worked — update stored code so restarts use the good version
 99                    self._code = fixed
100                    error_msg  = None
101                    logger.info(f"[{self.name}] Code fixed by LLM after {attempt} attempt(s).")
102                    await self._mqtt_publish(
103                        f"agents/{self.actor_id}/logs",
104                        {"type": "log",
105                         "message": f"Syntax error fixed by LLM after {attempt} attempt(s).",
106                         "timestamp": time.time()},
107                    )
108                    break
109                # Fix compiled but still broken — feed it back for the next attempt
110                current_code = fixed
111                error_msg    = new_err
112
113        if error_msg:
114            # All attempts exhausted — publish fatal and stop
115            err_exc = SyntaxError(error_msg)
116            logger.error(f"[{self.name}] Code compilation failed permanently: {error_msg}")
117            await self._publish_error(phase="compile", error=err_exc,
118                                      traceback_str=error_msg, fatal=True)
119            return
120
121        # ── setup() ───────────────────────────────────────────────────────
122        if self._fn_setup:
123            # Run setup as a background task so long-running loops (e.g. aiomqtt
124            # subscriptions) don't block on_start() and prevent heartbeats from firing.
125            self._tasks.append(asyncio.create_task(self._run_setup()))
126        else:
127            if self._fn_process:
128                self._tasks.append(asyncio.create_task(self._process_loop()))
129
130        # Publish manifest immediately so main's registry knows this agent exists
131        # even if it never calls publish() (pure handle_task agents, etc.)
132        await self._api._publish_manifest()

Called when actor starts. Override for init logic.

async def on_stop(self):
134    async def on_stop(self):
135        # Give generated code a chance to clean up
136        cleanup = self._ns.get("cleanup")
137        if cleanup:
138            try:
139                await cleanup(self._api)
140            except Exception:
141                pass

Called when actor stops. Override for cleanup.

async def handle_message(self, msg: Message):
399    async def handle_message(self, msg: Message):
400        if msg.type == MessageType.TASK:
401            self.metrics.messages_processed += 1
402            if self._fn_handle_task:
403                try:
404                    result = await self._fn_handle_task(self._api, msg.payload or {})
405                    if msg.sender_id and result is not None:
406                        await self.send(msg.sender_id, MessageType.RESULT, result)
407                except Exception as e:
408                    tb = traceback.format_exc()
409                    logger.error(f"[{self.name}] handle_task() error: {e}\n{tb}")
410                    await self._publish_error(phase="handle_task", error=e, traceback_str=tb)
411                    if msg.sender_id:
412                        await self.send(msg.sender_id, MessageType.RESULT, {
413                            "error":       str(e),
414                            "error_phase": "handle_task",
415                            "agent":       self.name,
416                        })
417            else:
418                if msg.sender_id:
419                    await self.send(msg.sender_id, MessageType.RESULT,
420                                    {"info": f"{self.name} has no handle_task defined"})

Handle messages not caught by default handlers.

def get_status(self) -> dict:
480    def get_status(self) -> dict:
481        s = super().get_status()
482        s["description"] = self.description
483        s["code"]        = self._code
484        s["agent_type"]  = "dynamic"
485        return s
class InstallerAgent(wactorz.Actor):
 74class InstallerAgent(Actor):
 75    """
 76    Pre-defined agent that installs Python packages on demand.
 77    Uses sys.executable so packages are installed into the active venv.
 78    """
 79
 80    def __init__(self, **kwargs):
 81        kwargs.setdefault("name", "installer")
 82        super().__init__(**kwargs)
 83        self.protected    = True
 84        self._install_log: list[dict] = []
 85
 86    def _current_task_description(self) -> str:
 87        return "idle"
 88
 89    async def on_start(self):
 90        logger.info(f"[{self.name}] Installer ready — using: {sys.executable}")
 91        await self._mqtt_publish(
 92            f"agents/{self.actor_id}/logs",
 93            {"type": "log", "message": f"Installer ready ({sys.executable})", "timestamp": time.time()},
 94        )
 95        await self.publish_manifest(
 96            description="Installs Python packages on demand via pip",
 97            capabilities=["pip_install", "package_management"],
 98        )
 99
100    async def handle_message(self, msg: Message):
101        if msg.type == MessageType.TASK:
102            result = await self._handle_install(msg)
103            # Echo task_id back so caller's future can resolve
104            if isinstance(msg.payload, dict):
105                task_id = msg.payload.get("task") or msg.payload.get("_task_id")
106                if task_id:
107                    result["task"] = task_id
108                    result["_task_id"] = task_id
109            target = msg.reply_to or msg.sender_id
110            if target:
111                await self.send(target, MessageType.RESULT, result)
112
113    async def _handle_install(self, msg: Message) -> dict:
114        payload = msg.payload if isinstance(msg.payload, dict) else {}
115        action  = payload.get("action", "install")
116
117        if action == "install":
118            packages = payload.get("packages", [])
119            if isinstance(packages, str):
120                packages = [p.strip() for p in packages.replace(",", " ").split()]
121            return await self._install_packages(packages)
122
123        if action == "check":
124            packages = payload.get("packages", [])
125            if isinstance(packages, str):
126                packages = [p.strip() for p in packages.replace(",", " ").split()]
127            return self._check_packages(packages)
128
129        if action == "resolve":
130            return self._resolve_imports(payload.get("imports", []))
131
132        if action == "history":
133            return {"history": self._install_log[-20:]}
134
135        if action == "node_install":
136            # Install packages on a remote node via SSH
137            # payload: {host, user, packages, password (opt), key_path (opt)}
138            return await self._node_install(payload)
139
140        if action == "node_deploy":
141            # Full bootstrap: copy remote_runner.py + install deps + start runner
142            # payload: {host, user, node_name, broker, password (opt), key_path (opt)}
143            return await self._node_deploy(payload)
144
145        if action == "node_run":
146            # Run an arbitrary command on a remote node via SSH
147            # payload: {host, user, command, password (opt), key_path (opt)}
148            return await self._node_run(payload)
149
150        return {"error": f"Unknown action: {action}"}
151
152    # ── Core install logic ──────────────────────────────────────────────────
153
154    async def _install_packages(self, packages: list[str]) -> dict:
155        if not packages:
156            return {"error": "No packages specified"}
157
158        results = {}
159        failed  = []
160
161        for pkg in packages:
162            pkg = pkg.strip()
163            if not pkg:
164                continue
165
166            # Resolve import name → pip name (e.g. "cv2" → "opencv-python")
167            pip_name = IMPORT_TO_PACKAGE.get(pkg, pkg)
168
169            # Check if already importable (invalidate cache so fresh installs show up)
170            import_name = PACKAGE_TO_IMPORT.get(pip_name, pip_name)
171            if self._is_installed(import_name):
172                logger.info(f"[{self.name}] {pip_name} already installed.")
173                results[pip_name] = "already_installed"
174                continue
175
176            logger.info(f"[{self.name}] Installing {pip_name} into {sys.executable}...")
177            await self._mqtt_publish(
178                f"agents/{self.actor_id}/logs",
179                {"type": "log", "message": f"Installing {pip_name}...", "timestamp": time.time()},
180            )
181
182            success, output = await self._pip_install(pip_name)
183
184            # duckduckgo-search was renamed to ddgs in v9 — try the other name as fallback
185            if not success and pip_name in ("duckduckgo-search", "ddgs"):
186                alt = "ddgs" if pip_name == "duckduckgo-search" else "duckduckgo-search"
187                logger.info(f"[{self.name}] Trying alternative name: {alt}")
188                success, output = await self._pip_install(alt)
189                if success:
190                    pip_name = alt
191
192            # pdfplumber sometimes fails on Windows — try pymupdf (fitz) as fallback
193            if not success and pip_name == "pdfplumber":
194                logger.info(f"[{self.name}] pdfplumber failed, trying pymupdf as fallback...")
195                success, output = await self._pip_install("pymupdf")
196                if success:
197                    pip_name = "pymupdf"
198
199            results[pip_name] = "installed" if success else f"failed: {output[-300:]}"
200            if not success:
201                failed.append(pip_name)
202
203            self._install_log.append({
204                "package":   pip_name,
205                "success":   success,
206                "timestamp": time.time(),
207                "output":    output[-500:],
208            })
209
210            if success:
211                status = f"✓ {pip_name} installed"
212            else:
213                # Show the actual pip error so failures are diagnosable
214                err_snippet = output[-400:].strip().replace("\n", " | ")
215                status = f"✗ {pip_name} FAILED: {err_snippet}"
216            logger.info(f"[{self.name}] {status}")
217            await self._mqtt_publish(
218                f"agents/{self.actor_id}/logs",
219                {"type": "log", "message": status, "timestamp": time.time()},
220            )
221
222        return {
223            "results": results,
224            "failed":  failed,
225            "success": len(failed) == 0,
226            "message": f"Installed {len(results) - len(failed)}/{len(results)} packages",
227        }
228
229    async def _pip_install(self, package: str) -> tuple[bool, str]:
230        """Run pip install using the same interpreter that launched this process.
231
232        sys.executable inside a venv points to  venv/Scripts/python.exe  (Windows)
233        or  venv/bin/python  (Linux/Mac), so packages always land in the right place.
234
235        Uses subprocess.run() in a thread executor instead of asyncio.create_subprocess_exec()
236        because asyncio subprocesses are unreliable on Windows with SelectorEventLoop
237        (the default in some Python versions / environments). subprocess.run() works
238        correctly on all platforms.
239        """
240        import subprocess
241
242        cmd = [sys.executable, "-m", "pip", "install", package, "--quiet"]
243        if sys.platform != "win32":
244            cmd.append("--break-system-packages")
245
246        def _run_pip() -> tuple[bool, str]:
247            try:
248                result = subprocess.run(
249                    cmd,
250                    stdout=subprocess.PIPE,
251                    stderr=subprocess.PIPE,
252                    timeout=180,
253                )
254                output = (result.stdout + result.stderr).decode("utf-8", errors="replace")
255                return result.returncode == 0, output
256            except subprocess.TimeoutExpired:
257                return False, "pip timed out after 180s"
258            except FileNotFoundError:
259                return False, f"Python executable not found: {sys.executable}"
260            except Exception as e:
261                return False, f"{type(e).__name__}: {e}"
262
263        try:
264            loop    = asyncio.get_event_loop()
265            success, output = await loop.run_in_executor(None, _run_pip)
266
267            if success:
268                # Refresh import machinery so the new package is visible immediately
269                importlib.invalidate_caches()
270
271            return success, output
272
273        except Exception as e:
274            return False, f"Executor error: {type(e).__name__}: {e}"
275
276    def _is_installed(self, import_name: str) -> bool:
277        """Check importability, always refreshing the import cache first."""
278        importlib.invalidate_caches()
279        try:
280            importlib.import_module(import_name)
281            return True
282        except ImportError:
283            return False
284
285    # ── Helper actions ──────────────────────────────────────────────────────
286
287    def _check_packages(self, packages: list[str]) -> dict:
288        status = {}
289        for pkg in packages:
290            pip_name    = IMPORT_TO_PACKAGE.get(pkg, pkg)
291            import_name = PACKAGE_TO_IMPORT.get(pip_name, pip_name)
292            status[pkg] = "installed" if self._is_installed(import_name) else "missing"
293        return {"status": status}
294
295    def _resolve_imports(self, imports: list[str]) -> dict:
296        return {"resolved": {imp: IMPORT_TO_PACKAGE.get(imp, imp) for imp in imports}}
297
298    # ── Remote node helpers (SSH via asyncssh) ──────────────────────────────
299
300    def _ssh_kwargs(self, payload: dict) -> dict:
301        """Build asyncssh connection kwargs from a task payload."""
302        kwargs = dict(
303            host        = payload["host"],
304            username    = payload.get("user", "pi"),
305            known_hosts = None,   # disable host key checking for LAN deploys
306        )
307        if payload.get("password"):
308            kwargs["password"] = payload["password"]
309        if payload.get("key_path"):
310            kwargs["client_keys"] = [payload["key_path"]]
311        return kwargs
312
313    async def _ssh_run(self, conn, command: str) -> tuple[bool, str]:
314        """Run a single command over an open SSH connection. Returns (ok, output)."""
315        result = await conn.run(command, check=False)
316        output = (result.stdout or "") + (result.stderr or "")
317        return result.exit_status == 0, output.strip()
318
319    def _log_remote(self, message: str):
320        logger.info(f"[{self.name}] {message}")
321        asyncio.create_task(self._mqtt_publish(
322            f"agents/{self.actor_id}/logs",
323            {"type": "log", "message": message, "timestamp": time.time()},
324        ))
325
326    async def _node_install(self, payload: dict) -> dict:
327        """
328        Install pip packages on a remote node via SSH.
329
330        payload keys:
331          host      — IP or hostname of the remote machine
332          user      — SSH username (default: "pi")
333          packages  — list of package names to install
334          password  — SSH password (optional, prefer key auth)
335          key_path  — path to SSH private key (optional)
336        """
337        try:
338            import asyncssh
339        except ImportError:
340            return {"error": "asyncssh not installed. Run: pip install asyncssh"}
341
342        host     = payload.get("host")
343        packages = payload.get("packages", [])
344        if isinstance(packages, str):
345            packages = [p.strip() for p in packages.replace(",", " ").split()]
346        if not host:
347            return {"error": "Missing 'host' in payload"}
348        if not packages:
349            return {"error": "No packages specified"}
350
351        pkg_str = " ".join(packages)
352        self._log_remote(f"Installing {pkg_str} on {host}...")
353
354        try:
355            async with asyncssh.connect(**self._ssh_kwargs(payload)) as conn:
356                ok, output = await self._ssh_run(
357                    conn,
358                    f"pip install {pkg_str} --break-system-packages -q 2>&1"
359                )
360                if ok:
361                    self._log_remote(f"✓ {pkg_str} installed on {host}")
362                    return {"success": True, "host": host, "packages": packages, "output": output[-300:]}
363                else:
364                    self._log_remote(f"✗ Install failed on {host}: {output[-200:]}")
365                    return {"success": False, "host": host, "error": output[-400:]}
366
367        except Exception as e:
368            return {"success": False, "host": host, "error": str(e)}
369
370    async def _node_deploy(self, payload: dict) -> dict:
371        """
372        Full bootstrap of a new Wactorz edge node via SSH.
373
374        Steps:
375          1. Create ~/wactorz/ directory
376          2. Upload remote_runner.py
377          3. Install aiomqtt (the only runtime dependency)
378          4. Kill any existing runner with the same node name
379          5. Start the runner in the background
380          6. Verify it appears online within 15 seconds
381
382        payload keys:
383          host       — IP or hostname
384          user       — SSH username (default: "pi")
385          node_name  — name this node will use (default: "remote-node")
386          broker     — MQTT broker host reachable FROM the Pi (default: "localhost")
387          password   — SSH password (optional)
388          key_path   — path to SSH private key (optional)
389          port       — MQTT broker port (default: 1883)
390        """
391        try:
392            import asyncssh
393        except ImportError:
394            return {"error": "asyncssh not installed. Run: pip install asyncssh"}
395
396        host      = payload.get("host")
397        user      = payload.get("user", "pi")
398        node_name = payload.get("node_name", "remote-node")
399        broker    = payload.get("broker", "localhost")
400        mqtt_port = payload.get("port", 1883)
401
402        if not host:
403            return {"error": "Missing 'host' in payload"}
404
405        # Find remote_runner.py relative to this file
406        import pathlib
407        candidates = [
408            pathlib.Path(__file__).parent.parent / "remote_runner.py",
409            pathlib.Path("remote_runner.py"),
410            pathlib.Path(__file__).parent.parent.parent / "remote_runner.py",
411        ]
412        runner_path = next((p for p in candidates if p.exists()), None)
413        if not runner_path:
414            return {"error": "remote_runner.py not found. Make sure it is in the wactorz root."}
415
416        self._log_remote(f"Deploying node '{node_name}' to {user}@{host}...")
417
418        try:
419            async with asyncssh.connect(**self._ssh_kwargs(payload)) as conn:
420
421                # 1. Create directory
422                await self._ssh_run(conn, "mkdir -p ~/wactorz")
423                self._log_remote(f"[{node_name}] Directory created.")
424
425                # 2. Upload remote_runner.py
426                async with conn.start_sftp_client() as sftp:
427                    await sftp.put(str(runner_path), f"/home/{user}/wactorz/remote_runner.py")
428                self._log_remote(f"[{node_name}] remote_runner.py uploaded.")
429
430                # 3. Install the only required dependency
431                ok, out = await self._ssh_run(
432                    conn, "pip install aiomqtt --break-system-packages -q 2>&1"
433                )
434                if not ok:
435                    self._log_remote(f"[{node_name}] pip install warning: {out[:150]}")
436                else:
437                    self._log_remote(f"[{node_name}] aiomqtt installed.")
438
439                # 4. Kill any existing instance with this node name
440                await self._ssh_run(
441                    conn,
442                    f"pkill -f 'remote_runner.py.*--name {node_name}' 2>/dev/null; true"
443                )
444
445                # 5. Start runner in the background
446                cmd = (
447                    f"nohup python3 ~/wactorz/remote_runner.py "
448                    f"--broker {broker} --port {mqtt_port} --name {node_name} "
449                    f"> ~/wactorz/{node_name}.log 2>&1 &"
450                )
451                await self._ssh_run(conn, cmd)
452                self._log_remote(f"[{node_name}] Runner started.")
453
454            self._log_remote(
455                f"[{node_name}] Deploy complete! Node will appear in /nodes within 15s."
456            )
457            return {
458                "success":   True,
459                "node_name": node_name,
460                "host":      host,
461                "broker":    broker,
462                "message":   (
463                    f"Node '{node_name}' deployed to {user}@{host}. "
464                    f"It will appear in /nodes within ~15 seconds."
465                ),
466            }
467
468        except Exception as e:
469            msg = f"Deploy failed for '{node_name}' on {host}: {e}"
470            self._log_remote(msg)
471            return {"success": False, "node_name": node_name, "host": host, "error": str(e)}
472
473    async def _node_run(self, payload: dict) -> dict:
474        """
475        Run an arbitrary shell command on a remote node via SSH.
476
477        payload keys:
478          host     — IP or hostname
479          user     — SSH username (default: "pi")
480          command  — shell command to run
481          password / key_path — auth (optional)
482        """
483        try:
484            import asyncssh
485        except ImportError:
486            return {"error": "asyncssh not installed. Run: pip install asyncssh"}
487
488        host    = payload.get("host")
489        command = payload.get("command", "echo hello")
490        if not host:
491            return {"error": "Missing 'host' in payload"}
492
493        self._log_remote(f"Running on {host}: {command[:80]}")
494        try:
495            async with asyncssh.connect(**self._ssh_kwargs(payload)) as conn:
496                ok, output = await self._ssh_run(conn, command)
497                return {
498                    "success":   ok,
499                    "host":      host,
500                    "command":   command,
501                    "output":    output,
502                    "exit_code": 0 if ok else 1,
503                }
504        except Exception as e:
505            return {"success": False, "host": host, "error": str(e)}

Pre-defined agent that installs Python packages on demand. Uses sys.executable so packages are installed into the active venv.

InstallerAgent(**kwargs)
80    def __init__(self, **kwargs):
81        kwargs.setdefault("name", "installer")
82        super().__init__(**kwargs)
83        self.protected    = True
84        self._install_log: list[dict] = []
protected
async def on_start(self):
89    async def on_start(self):
90        logger.info(f"[{self.name}] Installer ready — using: {sys.executable}")
91        await self._mqtt_publish(
92            f"agents/{self.actor_id}/logs",
93            {"type": "log", "message": f"Installer ready ({sys.executable})", "timestamp": time.time()},
94        )
95        await self.publish_manifest(
96            description="Installs Python packages on demand via pip",
97            capabilities=["pip_install", "package_management"],
98        )

Called when actor starts. Override for init logic.

async def handle_message(self, msg: Message):
100    async def handle_message(self, msg: Message):
101        if msg.type == MessageType.TASK:
102            result = await self._handle_install(msg)
103            # Echo task_id back so caller's future can resolve
104            if isinstance(msg.payload, dict):
105                task_id = msg.payload.get("task") or msg.payload.get("_task_id")
106                if task_id:
107                    result["task"] = task_id
108                    result["_task_id"] = task_id
109            target = msg.reply_to or msg.sender_id
110            if target:
111                await self.send(target, MessageType.RESULT, result)

Handle messages not caught by default handlers.

class CatalogAgent(wactorz.Actor):
139class CatalogAgent(Actor):
140    """
141    Pre-built agent recipe library.
142    Spawns any catalog agent on request by delegating to main's spawn pipeline.
143    """
144
145    def __init__(self, **kwargs):
146        kwargs.setdefault("name", "catalog")
147        super().__init__(**kwargs)
148        self.protected = True
149        self._catalog  = _build_catalog()
150
151    # ── Lifecycle ──────────────────────────────────────────────────────────────
152
153    async def on_start(self):
154        names = list(self._catalog.keys())
155        logger.info(f"[{self.name}] Catalog ready — {len(names)} recipe(s): {names}")
156        await self._mqtt_publish(
157            f"agents/{self.actor_id}/logs",
158            {"type": "log",
159             "message": f"Catalog ready: {', '.join(names)}",
160             "timestamp": time.time()},
161        )
162
163        # Publish one manifest for the catalog agent itself
164        await self.publish_manifest(
165            description=(
166                "Pre-built agent recipe library. "
167                "Spawns ready-made agents by name without requiring code. "
168                f"Available: {', '.join(names)}"
169            ),
170            capabilities=["spawn_catalog_agent", "list_catalog_agents", "agent_catalog"],
171            input_schema={"action": "str — 'spawn' | 'list' | 'info'",
172                          "agent":  "str — agent name for spawn/info actions"},
173            output_schema={"ok": "bool", "message": "str",
174                           "agents": "list", "recipe": "dict"},
175        )
176
177        # Inject recipe manifests directly into main's _agent_manifests dict.
178        # Retry briefly since catalog and main start concurrently.
179        import time as _t
180
181        # Wait for main to be ready (up to 10s)
182        main = None
183        for _ in range(20):
184            main = self._registry.find_by_name("main") if self._registry else None
185            if main and hasattr(main, "_agent_manifests"):
186                break
187            await asyncio.sleep(0.5)
188
189        for name, recipe in self._catalog.items():
190            manifest = {
191                "name":          name,
192                "actor_id":      f"catalog.{name}",
193                "description":   recipe.get("description", ""),
194                "capabilities":  recipe.get("capabilities", []),
195                "input_schema":  recipe.get("input_schema",  {}),
196                "output_schema": recipe.get("output_schema", {}),
197                "publishes":     [],
198                "spawnable":     True,
199                "catalog":       self.name,
200                "timestamp":     _t.time(),
201            }
202
203            if main and hasattr(main, "_agent_manifests"):
204                main._agent_manifests[name] = manifest
205                logger.info(f"[{self.name}] Injected manifest for '{name}' into main")
206            else:
207                logger.warning(f"[{self.name}] main not ready — could not inject manifest for '{name}'")
208
209    def _current_task_description(self) -> str:
210        return f"catalog ({len(self._catalog)} recipes)"
211
212    # ── Message handling ───────────────────────────────────────────────────────
213
214    async def handle_message(self, msg: Message):
215        if msg.type != MessageType.TASK:
216            return
217
218        payload = msg.payload if msg.payload is not None else {}
219        result  = await self._handle(payload)
220
221        # Echo task_id so caller futures resolve
222        task_id = payload.get("task") or payload.get("_task_id") if isinstance(payload, dict) else None
223        if task_id:
224            result["task"]     = task_id
225            result["_task_id"] = task_id
226
227        target = msg.reply_to or msg.sender_id
228        if target:
229            await self.send(target, MessageType.RESULT, result)
230
231    async def _handle(self, payload) -> dict:
232        # Normalise to text first, then parse.
233        # Payloads arrive in three forms:
234        #   "spawn doc-to-pptx-agent"           ← raw string
235        #   {"text": "spawn doc-to-pptx-agent"} ← delegate_task() wrapping
236        #   {"action": "spawn", "agent": "..."}  ← structured dict
237
238        # ── Structured dict with explicit action key ───────────────────────
239        if isinstance(payload, dict) and payload.get("action"):
240            action = payload["action"].lower().strip()
241            if action == "list":
242                return self._action_list()
243            if action == "info":
244                return self._action_info(payload.get("agent", ""))
245            if action == "spawn":
246                return await self._action_spawn(payload.get("agent", ""), payload)
247            return {"ok": False, "message": f"Unknown action '{action}'. Use: spawn | list | info"}
248
249        # ── Convenience dict shortcuts ─────────────────────────────────────
250        if isinstance(payload, dict) and "spawn" in payload and isinstance(payload["spawn"], str):
251            return await self._action_spawn(payload["spawn"], payload)
252
253        # ── Extract text from any remaining form ───────────────────────────
254        if isinstance(payload, str):
255            text = payload.strip()
256        elif isinstance(payload, dict):
257            text = (payload.get("text") or payload.get("message") or payload.get("query") or "").strip()
258        else:
259            text = ""
260
261        # ── Parse "verb agent-name" ────────────────────────────────────────
262        if text:
263            parts = text.split(None, 1)
264            cmd   = parts[0].lower()
265            arg   = parts[1].strip() if len(parts) > 1 else ""
266            if cmd == "list":
267                return self._action_list()
268            if cmd == "info":
269                return self._action_info(arg)
270            if cmd == "spawn":
271                return await self._action_spawn(arg, {})
272            # Bare agent name with no verb → treat as spawn
273            if cmd in self._catalog:
274                return await self._action_spawn(cmd, {})
275
276        # ── Nothing parseable → helpful default ───────────────────────────
277        return self._action_list()
278
279    # ── Actions ────────────────────────────────────────────────────────────────
280
281    def _action_list(self) -> dict:
282        agents = []
283        for name, recipe in self._catalog.items():
284            agents.append({
285                "name":         name,
286                "description":  recipe.get("description", ""),
287                "capabilities": recipe.get("capabilities", []),
288            })
289        return {
290            "ok":      True,
291            "message": f"{len(agents)} agent(s) available in catalog",
292            "agents":  agents,
293        }
294
295    def _action_info(self, name: str) -> dict:
296        if not name:
297            return {"ok": False, "message": "Provide 'agent' name for info action"}
298        recipe = self._catalog.get(name)
299        if not recipe:
300            available = list(self._catalog.keys())
301            return {"ok": False, "message": f"'{name}' not in catalog. Available: {available}"}
302        # Return recipe without the full code string (too large for a response)
303        safe = {k: v for k, v in recipe.items() if k != "code"}
304        return {"ok": True, "message": f"Recipe for '{name}'", "recipe": safe}
305
306    async def _action_spawn(self, name: str, payload: dict) -> dict:
307        if not name:
308            return {"ok": False, "message": "Provide 'agent' name to spawn"}
309
310        recipe = self._catalog.get(name)
311        if not recipe:
312            available = list(self._catalog.keys())
313            return {"ok": False, "message": f"'{name}' not in catalog. Available: {available}"}
314
315        if not self._registry:
316            return {"ok": False, "message": "No registry available — cannot spawn"}
317
318        # If already running, return success immediately
319        existing = self._registry.find_by_name(name)
320        if existing:
321            return {"ok": True, "message": f"'{name}' is already running"}
322
323        logger.info(f"[{self.name}] Spawning '{name}'...")
324        await self._mqtt_publish(
325            f"agents/{self.actor_id}/logs",
326            {"type": "log", "message": f"Spawning '{name}'...", "timestamp": time.time()},
327        )
328
329        try:
330            from .dynamic_agent import DynamicAgent
331
332            # ── Auto-install Python dependencies ───────────────────────────
333            install = recipe.get("install", [])
334            if install:
335                installer = self._registry.find_by_name("installer") if self._registry else None
336                if installer:
337                    await agent.log(f"Installing deps for '{name}': {install}") if False else None
338                    logger.info(f"[{self.name}] Installing deps for '{name}': {install}")
339                    import uuid as _uuid
340                    task_id = f"cat_install_{_uuid.uuid4().hex[:8]}"
341                    future  = asyncio.get_event_loop().create_future()
342                    installer._result_futures = getattr(installer, "_result_futures", {})
343                    # Use main's result futures since installer replies there
344                    main = self._registry.find_by_name("main") if self._registry else None
345                    if main:
346                        main._result_futures[task_id] = future
347                    await self.send(installer.actor_id, MessageType.TASK, {
348                        "action":   "install",
349                        "packages": install,
350                        "task":     task_id,
351                        "_task_id": task_id,
352                    })
353                    try:
354                        await asyncio.wait_for(future, timeout=120.0)
355                    except asyncio.TimeoutError:
356                        logger.warning(f"[{self.name}] Install timeout for '{name}' — proceeding anyway")
357                else:
358                    logger.warning(f"[{self.name}] installer agent not found — skipping dep install for '{name}'")
359
360            # Find main to get its llm_provider and persistence_dir
361            main = self._registry.find_by_name("main")
362            llm_provider    = getattr(main, "llm", None) if main else None
363            persistence_dir = str(getattr(main, "_persistence_dir", "./state/main").parent) if main else "./state"
364
365            actor = await self.spawn(
366                DynamicAgent,
367                name            = name,
368                code            = recipe["code"],
369                poll_interval   = float(recipe.get("poll_interval", 3600)),
370                description     = recipe.get("description", ""),
371                input_schema    = recipe.get("input_schema", {}),
372                output_schema   = recipe.get("output_schema", {}),
373                llm_provider    = llm_provider,
374                persistence_dir = persistence_dir,
375            )
376
377            if actor:
378                # Save to main's spawn registry so it survives restarts
379                if main and hasattr(main, "_save_to_spawn_registry"):
380                    main._save_to_spawn_registry(recipe)
381
382                msg = f"'{name}' spawned and running"
383                logger.info(f"[{self.name}] {msg}")
384                await self._mqtt_publish(
385                    f"agents/{self.actor_id}/logs",
386                    {"type": "log", "message": msg, "timestamp": time.time()},
387                )
388                return {"ok": True, "message": msg, "agent": name}
389            else:
390                return {"ok": False, "message": f"Spawn returned no actor for '{name}'"}
391
392        except Exception as e:
393            msg = f"Failed to spawn '{name}': {e}"
394            logger.error(f"[{self.name}] {msg}")
395            return {"ok": False, "message": msg}
396
397    # ── Public API for other agents ────────────────────────────────────────────
398
399    def list_recipes(self) -> list[str]:
400        """Return names of all available recipes."""
401        return list(self._catalog.keys())
402
403    def get_recipe(self, name: str) -> Optional[dict]:
404        """Return full recipe dict (including code) or None."""
405        return self._catalog.get(name)

Pre-built agent recipe library. Spawns any catalog agent on request by delegating to main's spawn pipeline.

CatalogAgent(**kwargs)
145    def __init__(self, **kwargs):
146        kwargs.setdefault("name", "catalog")
147        super().__init__(**kwargs)
148        self.protected = True
149        self._catalog  = _build_catalog()
protected
async def on_start(self):
153    async def on_start(self):
154        names = list(self._catalog.keys())
155        logger.info(f"[{self.name}] Catalog ready — {len(names)} recipe(s): {names}")
156        await self._mqtt_publish(
157            f"agents/{self.actor_id}/logs",
158            {"type": "log",
159             "message": f"Catalog ready: {', '.join(names)}",
160             "timestamp": time.time()},
161        )
162
163        # Publish one manifest for the catalog agent itself
164        await self.publish_manifest(
165            description=(
166                "Pre-built agent recipe library. "
167                "Spawns ready-made agents by name without requiring code. "
168                f"Available: {', '.join(names)}"
169            ),
170            capabilities=["spawn_catalog_agent", "list_catalog_agents", "agent_catalog"],
171            input_schema={"action": "str — 'spawn' | 'list' | 'info'",
172                          "agent":  "str — agent name for spawn/info actions"},
173            output_schema={"ok": "bool", "message": "str",
174                           "agents": "list", "recipe": "dict"},
175        )
176
177        # Inject recipe manifests directly into main's _agent_manifests dict.
178        # Retry briefly since catalog and main start concurrently.
179        import time as _t
180
181        # Wait for main to be ready (up to 10s)
182        main = None
183        for _ in range(20):
184            main = self._registry.find_by_name("main") if self._registry else None
185            if main and hasattr(main, "_agent_manifests"):
186                break
187            await asyncio.sleep(0.5)
188
189        for name, recipe in self._catalog.items():
190            manifest = {
191                "name":          name,
192                "actor_id":      f"catalog.{name}",
193                "description":   recipe.get("description", ""),
194                "capabilities":  recipe.get("capabilities", []),
195                "input_schema":  recipe.get("input_schema",  {}),
196                "output_schema": recipe.get("output_schema", {}),
197                "publishes":     [],
198                "spawnable":     True,
199                "catalog":       self.name,
200                "timestamp":     _t.time(),
201            }
202
203            if main and hasattr(main, "_agent_manifests"):
204                main._agent_manifests[name] = manifest
205                logger.info(f"[{self.name}] Injected manifest for '{name}' into main")
206            else:
207                logger.warning(f"[{self.name}] main not ready — could not inject manifest for '{name}'")

Called when actor starts. Override for init logic.

async def handle_message(self, msg: Message):
214    async def handle_message(self, msg: Message):
215        if msg.type != MessageType.TASK:
216            return
217
218        payload = msg.payload if msg.payload is not None else {}
219        result  = await self._handle(payload)
220
221        # Echo task_id so caller futures resolve
222        task_id = payload.get("task") or payload.get("_task_id") if isinstance(payload, dict) else None
223        if task_id:
224            result["task"]     = task_id
225            result["_task_id"] = task_id
226
227        target = msg.reply_to or msg.sender_id
228        if target:
229            await self.send(target, MessageType.RESULT, result)

Handle messages not caught by default handlers.

def list_recipes(self) -> list[str]:
399    def list_recipes(self) -> list[str]:
400        """Return names of all available recipes."""
401        return list(self._catalog.keys())

Return names of all available recipes.

def get_recipe(self, name: str) -> Optional[dict]:
403    def get_recipe(self, name: str) -> Optional[dict]:
404        """Return full recipe dict (including code) or None."""
405        return self._catalog.get(name)

Return full recipe dict (including code) or None.

def HomeAssistantHardwareAgent(*_hw_args, **_hw_kwargs):
428def HomeAssistantHardwareAgent(*_hw_args, **_hw_kwargs):  # type: ignore[no-redef]
429    _warnings_hw.warn(
430        "HomeAssistantHardwareAgent is deprecated and will be removed in a future release. "
431        "Use HomeAssistantAgent instead.",
432        DeprecationWarning,
433        stacklevel=2,
434    )
435    _hw_kwargs.setdefault("name", "home-assistant-agent")
436    from .home_assistant_agent import HomeAssistantAgent as _HA  # noqa: PLC0415
437    return _HA(*_hw_args, **_hw_kwargs)