wactorz

Wactorz - Actor-Model Multi-Agent Framework

 1"""Wactorz - Actor-Model Multi-Agent Framework"""
 2from ._version import __version__
 3from .core.actor import Actor, ActorState, Message, MessageType
 4from .core.registry import ActorSystem, ActorRegistry
 5__all__ = [
 6    "__version__",
 7    "Actor", "ActorState", "Message", "MessageType",
 8    "ActorSystem", "ActorRegistry",
 9]
10# Optional agents — only exported when their dependencies are available.
11try:
12    from .agents.llm_agent import LLMAgent, AnthropicProvider, OpenAIProvider, OllamaProvider, NIMProvider
13    __all__ += ["LLMAgent", "AnthropicProvider", "OpenAIProvider", "OllamaProvider", "NIMProvider"]
14except ImportError:
15    pass
16try:
17    from .agents.main_actor import MainActor
18    from .agents.monitor_agent import MonitorActor
19    from .agents.manual_agent import ManualAgent
20    from .agents.planner_agent import PlannerAgent
21    from .agents.dynamic_agent import DynamicAgent
22    from .agents.installer_agent import InstallerAgent
23    from .agents.catalog_agent import CatalogAgent
24    __all__ += ["MainActor", "MonitorActor", "CodeAgent", "ManualAgent", "PlannerAgent",
25                "DynamicAgent", "InstallerAgent", "CatalogAgent"]
26except ImportError:
27    pass
28#try:
29#    from .agents.ml_agent import MLAgent, YOLOAgent, AnomalyDetectorAgent
30#    __all__ += ["MLAgent", "YOLOAgent", "AnomalyDetectorAgent"]
31#except ImportError:
32#    pass
33try:
34    from .agents.home_assistant_hardware_agent import HomeAssistantHardwareAgent
35    __all__ += ["HomeAssistantHardwareAgent"]
36except ImportError:
37    pass
__version__ = '0.3.0'
class Actor(abc.ABC):
103class Actor(ABC):
104    """
105    Base Actor class. All agents inherit from this.
106    Actors are fully async and communicate only through messages.
107    """
108
109    def __init__(
110        self,
111        actor_id: Optional[str] = None,
112        name: Optional[str] = None,
113        persistence_dir: str = "./actor_state",
114        mailbox_size: int = 1000,
115    ):
116        if actor_id:
117            self.actor_id = actor_id
118        elif name:
119            # Deterministic UUID from name — same name always gets same ID across restarts
120            self.actor_id = str(uuid.uuid5(uuid.NAMESPACE_DNS, f"wactorz.actor.{name}"))
121        else:
122            self.actor_id = str(uuid.uuid4())
123        self.name = name or f"actor-{self.actor_id[:8]}"
124        self.state = ActorState.IDLE
125        self.metrics = ActorMetrics()
126
127        # Async mailbox (inbox)
128        self._mailbox: asyncio.Queue = asyncio.Queue(maxsize=mailbox_size)
129        self._outbox: dict[str, asyncio.Queue] = {}  # actor_id -> queue ref
130
131        # Registry reference (set by ActorSystem)
132        self._registry: Optional["ActorRegistry"] = None
133        self._mqtt_client: Optional[Any] = None
134        self._mqtt_broker: str = "localhost"
135        self._mqtt_port: int = 1883
136
137        # Persistence
138        # Use name as persistence folder so it survives restarts with same name
139        # Falls back to actor_id for anonymous actors
140        safe_name = self.name.replace("/", "_").replace("\\", "_")
141        self._persistence_dir = Path(persistence_dir) / safe_name
142        self._persistence_dir.mkdir(parents=True, exist_ok=True)
143        self._persistent_state: dict = {}
144
145        # Unified persistence API — set by ActorSystem if available,
146        # otherwise falls back to legacy pickle behavior
147        self._persistence_api: Optional[Any] = None
148
149        # Protection — if True, stop/delete/pause commands are ignored
150        self.protected: bool = False
151
152        # Supervisor reference — set by Supervisor when this actor is registered under it
153        self.supervisor_id: Optional[str] = None
154
155        # Handlers
156        self._handlers: dict[MessageType, Callable] = {}
157        self._setup_default_handlers()
158
159        # Background tasks
160        self._tasks: list[asyncio.Task] = []
161
162        logger.info(f"[{self.name}] Actor created with id={self.actor_id}")
163
164    # ─── Lifecycle ────────────────────────────────────────────────────────────
165
166    async def start(self):
167        """Start the actor's event loop."""
168        self.state = ActorState.RUNNING
169        self.metrics.start_time = time.time()
170        await self._load_persistent_state()
171        await self.on_start()
172        self._tasks.append(asyncio.create_task(self._message_loop()))
173        self._tasks.append(asyncio.create_task(self._heartbeat_loop()))
174        self._tasks.append(asyncio.create_task(self._command_listener()))
175        await self._publish_status()
176        logger.info(f"[{self.name}] Actor started.")
177
178    async def stop(self):
179        """Gracefully stop the actor."""
180        self.state = ActorState.STOPPED
181        for task in self._tasks:
182            task.cancel()
183        await self.on_stop()                  # on_stop() calls persist() first
184        await self._save_persistent_state()   # THEN save to disk
185        await self._publish_status()
186        # ── Unregister from TopicBus ───────────────────────────────────
187        # Remove this agent's TopicContract so the planner doesn't wire
188        # against topics from stopped/deleted/replaced agents.
189        try:
190            from .topic_bus import get_topic_bus
191            bus = get_topic_bus()
192            if bus:
193                bus.unregister(self.name)
194        except Exception:
195            pass  # TopicBus not initialised or unavailable — not fatal
196        logger.info(f"[{self.name}] Actor stopped.")
197
198    async def pause(self):
199        self.state = ActorState.PAUSED
200        await self._publish_status()
201
202    async def resume(self):
203        self.state = ActorState.RUNNING
204        await self._publish_status()
205
206    # ─── Message Loop ─────────────────────────────────────────────────────────
207
208    async def _message_loop(self):
209        """Main message processing loop."""
210        while self.state not in (ActorState.STOPPED, ActorState.FAILED):
211            try:
212                if self.state == ActorState.PAUSED:
213                    await asyncio.sleep(0.1)
214                    continue
215
216                msg = await asyncio.wait_for(self._mailbox.get(), timeout=1.0)
217                # Only count meaningful messages — not heartbeats, status pings, lifecycle
218                _noise = {MessageType.HEARTBEAT, MessageType.STATUS_REQUEST,
219                          MessageType.STATUS_RESPONSE, MessageType.STOP,
220                          MessageType.PAUSE, MessageType.RESUME}
221                if msg.type not in _noise:
222                    self.metrics.messages_processed += 1
223                await self._dispatch(msg)
224                self._mailbox.task_done()
225
226            except asyncio.TimeoutError:
227                continue
228            except asyncio.CancelledError:
229                break
230            except Exception as e:
231                self.metrics.errors += 1
232                logger.error(f"[{self.name}] Error in message loop: {e}", exc_info=True)
233
234    async def _dispatch(self, msg: Message):
235        """Dispatch message to the appropriate handler."""
236        handler = self._handlers.get(msg.type)
237        if handler:
238            await handler(msg)
239        else:
240            await self.handle_message(msg)
241
242    def _setup_default_handlers(self):
243        self._handlers = {
244            MessageType.STOP: self._handle_stop,
245            MessageType.PAUSE: self._handle_pause,
246            MessageType.RESUME: self._handle_resume,
247            MessageType.STATUS_REQUEST: self._handle_status_request,
248            MessageType.HEARTBEAT: self._handle_heartbeat_msg,
249        }
250
251    async def _handle_stop(self, msg: Message):
252        await self.stop()
253
254    async def _handle_pause(self, msg: Message):
255        await self.pause()
256
257    async def _handle_resume(self, msg: Message):
258        await self.resume()
259
260    async def _handle_status_request(self, msg: Message):
261        status = self.get_status()
262        # Reply to sender_id (always), reply_to is optional override
263        target = msg.reply_to or msg.sender_id
264        if target:
265            await self.send(target, MessageType.STATUS_RESPONSE, status)
266
267    async def _handle_heartbeat_msg(self, msg: Message):
268        pass  # Monitor actor handles these
269
270    # ─── Heartbeat ────────────────────────────────────────────────────────────
271
272    async def _heartbeat_loop(self, interval: float = 10.0):
273        """Periodically publish heartbeat via MQTT."""
274        # Publish immediately on start so monitor sees agent right away
275        await asyncio.sleep(0.5)
276        await self._mqtt_publish(f"agents/{self.actor_id}/heartbeat", self._build_heartbeat())
277        await self._mqtt_publish(f"agents/{self.actor_id}/metrics", self._build_metrics())
278        while self.state not in (ActorState.STOPPED, ActorState.FAILED):
279            try:
280                await asyncio.sleep(interval)
281                hb = self._build_heartbeat()
282                self.metrics.last_heartbeat = time.time()
283                await self._mqtt_publish(f"agents/{self.actor_id}/heartbeat", hb)
284                await self._mqtt_publish(f"agents/{self.actor_id}/metrics", self._build_metrics())
285            except asyncio.CancelledError:
286                break
287            except Exception as e:
288                logger.warning(f"[{self.name}] Heartbeat error: {e}")
289
290    def _build_heartbeat(self) -> dict:
291        proc = psutil.Process()
292        return {
293            "actor_id":  self.actor_id,
294            "name":      self.name,
295            "timestamp": time.time(),
296            "state":     self.state.value,
297            "cpu":       proc.cpu_percent(interval=0.1),
298            "memory_mb": proc.memory_info().rss / 1024 / 1024,
299            "task":      self._current_task_description(),
300            "protected": self.protected,
301        }
302
303    def _build_metrics(self) -> dict:
304        return {
305            "actor_id": self.actor_id,
306            "messages_processed": self.metrics.messages_processed,
307            "errors": self.metrics.errors,
308            "uptime": self.metrics.uptime,
309            "tasks_completed": self.metrics.tasks_completed,
310            "tasks_failed": self.metrics.tasks_failed,
311            "restart_count": self.metrics.restart_count,
312        }
313
314    async def _command_listener(self):
315        """Listen for commands published to agents/{id}/commands via MQTT."""
316        try:
317            import aiomqtt
318        except ImportError:
319            return
320
321        topic = f"agents/{self.actor_id}/commands"
322        while self.state not in (ActorState.STOPPED, ActorState.FAILED):
323            try:
324                async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client:
325                    await client.subscribe(topic)
326                    logger.debug(f"[{self.name}] Subscribed to {topic}")
327                    async for message in client.messages:
328                        try:
329                            data    = json.loads(message.payload.decode())
330                            command = data.get("command", "")
331                            logger.info(f"[{self.name}] Received command: {command}")
332                            if self.protected and command in ("stop", "pause", "delete"):
333                                logger.warning(f"[{self.name}] Ignoring '{command}' — actor is protected.")
334                                continue
335                            if command == "stop":
336                                await self.stop()
337                                return
338                            elif command == "pause":
339                                await self.pause()
340                            elif command == "resume":
341                                await self.resume()
342                            elif command == "delete":
343                                # If main actor knows about this agent, remove from spawn registry
344                                if self._registry:
345                                    main = self._registry.find_by_name("main")
346                                    if main and hasattr(main, "_remove_from_spawn_registry"):
347                                        main._remove_from_spawn_registry(self.name)
348                                    await self._registry.unregister(self.actor_id)
349                                await self.stop()
350                                return
351                        except Exception as e:
352                            logger.error(f"[{self.name}] Command parse error: {e}")
353            except asyncio.CancelledError:
354                break
355            except Exception as e:
356                if self.state not in (ActorState.STOPPED, ActorState.FAILED):
357                    await asyncio.sleep(5)
358
359    def _current_task_description(self) -> str:
360        return "idle"  # Override in subclasses
361
362    # ─── Messaging ────────────────────────────────────────────────────────────
363
364    async def send(self, target_id: str, msg_type: MessageType, payload: Any = None) -> bool:
365        """Send a message to another actor."""
366        if self._registry is None:
367            logger.warning(f"[{self.name}] No registry attached, cannot send messages.")
368            return False
369        msg = Message(type=msg_type, sender_id=self.actor_id, payload=payload)
370        return await self._registry.deliver(target_id, msg)
371
372    async def broadcast(self, msg_type: MessageType, payload: Any = None):
373        """Broadcast to all registered actors."""
374        if self._registry:
375            await self._registry.broadcast(self.actor_id, msg_type, payload)
376
377    async def receive(self, msg: Message):
378        """External entry point - put message in mailbox."""
379        await self._mailbox.put(msg)
380
381    # ─── Actor Spawning ───────────────────────────────────────────────────────
382
383    async def spawn(self, actor_class: type, **kwargs) -> "Actor":
384        """
385        Spawn a child actor. The child inherits:
386        - MQTT client (so it can publish heartbeats/status)
387        - Registry (so it can send/receive messages)
388        - Persistence dir defaults to same root
389        - Persistence API (SQLite/Redis/Pickle routing)
390        """
391        # Default persistence to same root as parent
392        kwargs.setdefault("persistence_dir", str(self._persistence_dir.parent))
393
394        child = actor_class(**kwargs)
395
396        # Inherit everything from parent
397        child._mqtt_client  = self._mqtt_client   # MQTT publish connection
398        child._mqtt_broker  = self._mqtt_broker   # broker address for command listener
399        child._mqtt_port    = self._mqtt_port     # broker port
400        child._registry     = self._registry      # message routing
401
402        # Inherit persistence API if available
403        if self._persistence_api is not None:
404            try:
405                from .persistence import PersistenceAPI, get_db, get_redis, get_pickle_store
406                db = get_db()
407                redis = get_redis()
408                pkl = get_pickle_store()
409                if db and redis and pkl:
410                    child._persistence_api = PersistenceAPI(db, redis, pkl, child.name)
411            except ImportError:
412                pass
413
414        # Register in registry
415        if self._registry:
416            await self._registry.register(child)
417
418        # Start the child
419        await child.start()
420
421        # Immediately announce to monitor - don't wait for heartbeat loop
422        await child._publish_status()
423        await child._mqtt_publish(
424            f"agents/{child.actor_id}/heartbeat",
425            child._build_heartbeat(),
426        )
427        await child._mqtt_publish(
428            f"agents/{child.actor_id}/metrics",
429            child._build_metrics(),
430        )
431
432        # Notify parent's topic that it spawned a child
433        await self._mqtt_publish(
434            f"agents/{self.actor_id}/spawned",
435            {"child_id": child.actor_id, "child_name": child.name, "timestamp": time.time()},
436        )
437        logger.info(f"[{self.name}] Spawned: {child.name} ({child.actor_id[:8]})")
438        return child
439
440    # ─── Persistence ──────────────────────────────────────────────────────────
441
442    async def _save_persistent_state(self):
443        """Save state to disk. Called on stop() after on_stop()."""
444        if self._persistence_api is not None:
445            # New path: state is written per-key via persist(), nothing to batch-save.
446            # But keep pickle save for agent.state (arbitrary objects) backward compat.
447            return
448        # Legacy pickle path
449        path = self._persistence_dir / "state.pkl"
450        try:
451            with open(path, "wb") as f:
452                pickle.dump(self._persistent_state, f)
453        except Exception as e:
454            logger.error(f"[{self.name}] Failed to save state: {e}")
455
456    async def _load_persistent_state(self):
457        """Load state from disk. Called on start() before on_start()."""
458        if self._persistence_api is not None:
459            # New path: state is loaded per-key via recall(), nothing to batch-load.
460            # But load legacy pickle for backward compat if it exists.
461            path = self._persistence_dir / "state.pkl"
462            if path.exists():
463                try:
464                    with open(path, "rb") as f:
465                        self._persistent_state = pickle.load(f)
466                    logger.info(f"[{self.name}] Loaded legacy persistent state (will migrate on first persist).")
467                except Exception as e:
468                    logger.error(f"[{self.name}] Failed to load legacy state: {e}")
469            return
470        # Legacy pickle path
471        path = self._persistence_dir / "state.pkl"
472        if path.exists():
473            try:
474                with open(path, "rb") as f:
475                    self._persistent_state = pickle.load(f)
476                logger.info(f"[{self.name}] Loaded persistent state.")
477            except Exception as e:
478                logger.error(f"[{self.name}] Failed to load state: {e}")
479
480    def persist(self, key: str, value: Any):
481        """
482        Persist a key-value pair. Routes to the correct backend:
483          - Known structured keys → SQLite
484          - Known ephemeral keys → Redis
485          - Everything else → Pickle
486
487        If the new PersistenceAPI is not available, falls back to legacy
488        pickle behavior (writes entire dict to disk on every call).
489        """
490        if self._persistence_api is not None:
491            self._persistence_api.set(key, value)
492            return
493
494        # Legacy pickle path — write to disk immediately
495        self._persistent_state[key] = value
496        path = self._persistence_dir / "state.pkl"
497        try:
498            with open(path, "wb") as f:
499                pickle.dump(self._persistent_state, f)
500        except Exception as e:
501            logger.debug(f"[{self.name}] persist write failed: {e}")
502
503    def recall(self, key: str, default: Any = None) -> Any:
504        """
505        Recall a persisted value. Routes to the correct backend.
506        Returns default if the key doesn't exist.
507        """
508        if self._persistence_api is not None:
509            # Check new store first, then fall back to legacy in-memory dict
510            # (handles migration period where some keys are in pickle, some in new store)
511            result = self._persistence_api.get(key)
512            if result is not None:
513                return result
514            # Fallback: check legacy in-memory state (loaded from old .pkl)
515            return self._persistent_state.get(key, default)
516
517        # Legacy pickle path
518        return self._persistent_state.get(key, default)
519
520    # ─── MQTT ─────────────────────────────────────────────────────────────────
521
522    async def _mqtt_publish(self, topic: str, payload: Any, retain: bool = False, qos: int = 0):
523        if self._mqtt_client:
524            try:
525                # Empty bytes = clear a retained message (MQTT spec)
526                # Must send raw empty bytes, not JSON-encoded
527                if payload == b"" or payload is None and retain:
528                    encoded = b""
529                elif isinstance(payload, (bytes, bytearray)):
530                    encoded = payload
531                else:
532                    encoded = json.dumps(payload)
533                await self._mqtt_client.publish(topic, encoded, retain=retain, qos=qos)
534            except Exception as e:
535                logger.debug(f"[{self.name}] MQTT publish failed: {e}")
536
537    async def _publish_status(self):
538        await self._mqtt_publish(f"agents/{self.actor_id}/status", self.get_status())
539
540    # ─── Status ───────────────────────────────────────────────────────────────
541
542    def get_status(self) -> dict:
543        return {
544            "actor_id": self.actor_id,
545            "name": self.name,
546            "state": self.state.value,
547            "uptime": self.metrics.uptime,
548            "messages_processed": self.metrics.messages_processed,
549            "restart_count": self.metrics.restart_count,
550            "supervised": self.supervisor_id is not None,
551        }
552
553    # ─── Abstract / Override ──────────────────────────────────────────────────
554
555    async def on_start(self):
556        """Called when actor starts. Override for init logic."""
557        pass
558
559    async def publish_manifest(self, description: str = "", publishes: list = None,
560                                capabilities: list = None, input_schema: dict = None,
561                                output_schema: dict = None):
562        """
563        Publish a capability manifest so main's topic registry can discover this actor.
564        Call from on_start() in any actor that wants to be discoverable.
565        Manifests are retained — main sees them immediately even after restart.
566
567        input_schema / output_schema — dicts describing expected payload fields, e.g.:
568            input_schema  = {"city": "str — city name to fetch weather for"}
569            output_schema = {"temp_c": "float", "condition": "str", "humidity": "int"}
570        """
571        import time as _t
572        manifest = {
573            "name":          self.name,
574            "actor_id":      self.actor_id,
575            "description":   description,
576            "publishes":     publishes or [],
577            "capabilities":  capabilities or [],
578            "input_schema":  input_schema or {},
579            "output_schema": output_schema or {},
580            "timestamp":     _t.time(),
581        }
582        await self._mqtt_publish(f"agents/{self.actor_id}/manifest", manifest, retain=True)
583
584    async def on_stop(self):
585        """Called when actor stops. Override for cleanup."""
586        pass
587
588    @abstractmethod
589    async def handle_message(self, msg: Message):
590        """Handle messages not caught by default handlers."""
591        pass
592
593    def __repr__(self):
594        return f"<Actor name={self.name} id={self.actor_id[:8]} state={self.state.value}>"

Base Actor class. All agents inherit from this. Actors are fully async and communicate only through messages.

name
state
metrics
protected: bool
supervisor_id: Optional[str]
async def start(self):
166    async def start(self):
167        """Start the actor's event loop."""
168        self.state = ActorState.RUNNING
169        self.metrics.start_time = time.time()
170        await self._load_persistent_state()
171        await self.on_start()
172        self._tasks.append(asyncio.create_task(self._message_loop()))
173        self._tasks.append(asyncio.create_task(self._heartbeat_loop()))
174        self._tasks.append(asyncio.create_task(self._command_listener()))
175        await self._publish_status()
176        logger.info(f"[{self.name}] Actor started.")

Start the actor's event loop.

async def stop(self):
178    async def stop(self):
179        """Gracefully stop the actor."""
180        self.state = ActorState.STOPPED
181        for task in self._tasks:
182            task.cancel()
183        await self.on_stop()                  # on_stop() calls persist() first
184        await self._save_persistent_state()   # THEN save to disk
185        await self._publish_status()
186        # ── Unregister from TopicBus ───────────────────────────────────
187        # Remove this agent's TopicContract so the planner doesn't wire
188        # against topics from stopped/deleted/replaced agents.
189        try:
190            from .topic_bus import get_topic_bus
191            bus = get_topic_bus()
192            if bus:
193                bus.unregister(self.name)
194        except Exception:
195            pass  # TopicBus not initialised or unavailable — not fatal
196        logger.info(f"[{self.name}] Actor stopped.")

Gracefully stop the actor.

async def pause(self):
198    async def pause(self):
199        self.state = ActorState.PAUSED
200        await self._publish_status()
async def resume(self):
202    async def resume(self):
203        self.state = ActorState.RUNNING
204        await self._publish_status()
async def send( self, target_id: str, msg_type: MessageType, payload: Any = None) -> bool:
364    async def send(self, target_id: str, msg_type: MessageType, payload: Any = None) -> bool:
365        """Send a message to another actor."""
366        if self._registry is None:
367            logger.warning(f"[{self.name}] No registry attached, cannot send messages.")
368            return False
369        msg = Message(type=msg_type, sender_id=self.actor_id, payload=payload)
370        return await self._registry.deliver(target_id, msg)

Send a message to another actor.

async def broadcast(self, msg_type: MessageType, payload: Any = None):
372    async def broadcast(self, msg_type: MessageType, payload: Any = None):
373        """Broadcast to all registered actors."""
374        if self._registry:
375            await self._registry.broadcast(self.actor_id, msg_type, payload)

Broadcast to all registered actors.

async def receive(self, msg: Message):
377    async def receive(self, msg: Message):
378        """External entry point - put message in mailbox."""
379        await self._mailbox.put(msg)

External entry point - put message in mailbox.

async def spawn(self, actor_class: type, **kwargs) -> Actor:
383    async def spawn(self, actor_class: type, **kwargs) -> "Actor":
384        """
385        Spawn a child actor. The child inherits:
386        - MQTT client (so it can publish heartbeats/status)
387        - Registry (so it can send/receive messages)
388        - Persistence dir defaults to same root
389        - Persistence API (SQLite/Redis/Pickle routing)
390        """
391        # Default persistence to same root as parent
392        kwargs.setdefault("persistence_dir", str(self._persistence_dir.parent))
393
394        child = actor_class(**kwargs)
395
396        # Inherit everything from parent
397        child._mqtt_client  = self._mqtt_client   # MQTT publish connection
398        child._mqtt_broker  = self._mqtt_broker   # broker address for command listener
399        child._mqtt_port    = self._mqtt_port     # broker port
400        child._registry     = self._registry      # message routing
401
402        # Inherit persistence API if available
403        if self._persistence_api is not None:
404            try:
405                from .persistence import PersistenceAPI, get_db, get_redis, get_pickle_store
406                db = get_db()
407                redis = get_redis()
408                pkl = get_pickle_store()
409                if db and redis and pkl:
410                    child._persistence_api = PersistenceAPI(db, redis, pkl, child.name)
411            except ImportError:
412                pass
413
414        # Register in registry
415        if self._registry:
416            await self._registry.register(child)
417
418        # Start the child
419        await child.start()
420
421        # Immediately announce to monitor - don't wait for heartbeat loop
422        await child._publish_status()
423        await child._mqtt_publish(
424            f"agents/{child.actor_id}/heartbeat",
425            child._build_heartbeat(),
426        )
427        await child._mqtt_publish(
428            f"agents/{child.actor_id}/metrics",
429            child._build_metrics(),
430        )
431
432        # Notify parent's topic that it spawned a child
433        await self._mqtt_publish(
434            f"agents/{self.actor_id}/spawned",
435            {"child_id": child.actor_id, "child_name": child.name, "timestamp": time.time()},
436        )
437        logger.info(f"[{self.name}] Spawned: {child.name} ({child.actor_id[:8]})")
438        return child

Spawn a child actor. The child inherits:

  • MQTT client (so it can publish heartbeats/status)
  • Registry (so it can send/receive messages)
  • Persistence dir defaults to same root
  • Persistence API (SQLite/Redis/Pickle routing)
def persist(self, key: str, value: Any):
480    def persist(self, key: str, value: Any):
481        """
482        Persist a key-value pair. Routes to the correct backend:
483          - Known structured keys → SQLite
484          - Known ephemeral keys → Redis
485          - Everything else → Pickle
486
487        If the new PersistenceAPI is not available, falls back to legacy
488        pickle behavior (writes entire dict to disk on every call).
489        """
490        if self._persistence_api is not None:
491            self._persistence_api.set(key, value)
492            return
493
494        # Legacy pickle path — write to disk immediately
495        self._persistent_state[key] = value
496        path = self._persistence_dir / "state.pkl"
497        try:
498            with open(path, "wb") as f:
499                pickle.dump(self._persistent_state, f)
500        except Exception as e:
501            logger.debug(f"[{self.name}] persist write failed: {e}")

Persist a key-value pair. Routes to the correct backend:

  • Known structured keys → SQLite
  • Known ephemeral keys → Redis
  • Everything else → Pickle

If the new PersistenceAPI is not available, falls back to legacy pickle behavior (writes entire dict to disk on every call).

def recall(self, key: str, default: Any = None) -> Any:
503    def recall(self, key: str, default: Any = None) -> Any:
504        """
505        Recall a persisted value. Routes to the correct backend.
506        Returns default if the key doesn't exist.
507        """
508        if self._persistence_api is not None:
509            # Check new store first, then fall back to legacy in-memory dict
510            # (handles migration period where some keys are in pickle, some in new store)
511            result = self._persistence_api.get(key)
512            if result is not None:
513                return result
514            # Fallback: check legacy in-memory state (loaded from old .pkl)
515            return self._persistent_state.get(key, default)
516
517        # Legacy pickle path
518        return self._persistent_state.get(key, default)

Recall a persisted value. Routes to the correct backend. Returns default if the key doesn't exist.

def get_status(self) -> dict:
542    def get_status(self) -> dict:
543        return {
544            "actor_id": self.actor_id,
545            "name": self.name,
546            "state": self.state.value,
547            "uptime": self.metrics.uptime,
548            "messages_processed": self.metrics.messages_processed,
549            "restart_count": self.metrics.restart_count,
550            "supervised": self.supervisor_id is not None,
551        }
async def on_start(self):
555    async def on_start(self):
556        """Called when actor starts. Override for init logic."""
557        pass

Called when actor starts. Override for init logic.

async def publish_manifest( self, description: str = '', publishes: list = None, capabilities: list = None, input_schema: dict = None, output_schema: dict = None):
559    async def publish_manifest(self, description: str = "", publishes: list = None,
560                                capabilities: list = None, input_schema: dict = None,
561                                output_schema: dict = None):
562        """
563        Publish a capability manifest so main's topic registry can discover this actor.
564        Call from on_start() in any actor that wants to be discoverable.
565        Manifests are retained — main sees them immediately even after restart.
566
567        input_schema / output_schema — dicts describing expected payload fields, e.g.:
568            input_schema  = {"city": "str — city name to fetch weather for"}
569            output_schema = {"temp_c": "float", "condition": "str", "humidity": "int"}
570        """
571        import time as _t
572        manifest = {
573            "name":          self.name,
574            "actor_id":      self.actor_id,
575            "description":   description,
576            "publishes":     publishes or [],
577            "capabilities":  capabilities or [],
578            "input_schema":  input_schema or {},
579            "output_schema": output_schema or {},
580            "timestamp":     _t.time(),
581        }
582        await self._mqtt_publish(f"agents/{self.actor_id}/manifest", manifest, retain=True)

Publish a capability manifest so main's topic registry can discover this actor. Call from on_start() in any actor that wants to be discoverable. Manifests are retained — main sees them immediately even after restart.

input_schema / output_schema — dicts describing expected payload fields, e.g.: input_schema = {"city": "str — city name to fetch weather for"} output_schema = {"temp_c": "float", "condition": "str", "humidity": "int"}

async def on_stop(self):
584    async def on_stop(self):
585        """Called when actor stops. Override for cleanup."""
586        pass

Called when actor stops. Override for cleanup.

@abstractmethod
async def handle_message(self, msg: Message):
588    @abstractmethod
589    async def handle_message(self, msg: Message):
590        """Handle messages not caught by default handlers."""
591        pass

Handle messages not caught by default handlers.

class ActorState(builtins.str, enum.Enum):
42class ActorState(str, Enum):
43    IDLE = "idle"
44    RUNNING = "running"
45    PAUSED = "paused"
46    STOPPED = "stopped"
47    FAILED = "failed"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.

IDLE = <ActorState.IDLE: 'idle'>
RUNNING = <ActorState.RUNNING: 'running'>
PAUSED = <ActorState.PAUSED: 'paused'>
STOPPED = <ActorState.STOPPED: 'stopped'>
FAILED = <ActorState.FAILED: 'failed'>
@dataclass
class Message:
68@dataclass
69class Message:
70    type: MessageType
71    sender_id: str
72    payload: Any = None
73    reply_to: Optional[str] = None
74    message_id: str = field(default_factory=lambda: str(uuid.uuid4()))
75    timestamp: float = field(default_factory=time.time)
76
77    def to_dict(self) -> dict:
78        return {
79            "type": self.type.value,
80            "sender_id": self.sender_id,
81            "payload": self.payload,
82            "reply_to": self.reply_to,
83            "message_id": self.message_id,
84            "timestamp": self.timestamp,
85        }
Message( type: MessageType, sender_id: str, payload: Any = None, reply_to: Optional[str] = None, message_id: str = <factory>, timestamp: float = <factory>)
type: MessageType
sender_id: str
payload: Any = None
reply_to: Optional[str] = None
message_id: str
timestamp: float
def to_dict(self) -> dict:
77    def to_dict(self) -> dict:
78        return {
79            "type": self.type.value,
80            "sender_id": self.sender_id,
81            "payload": self.payload,
82            "reply_to": self.reply_to,
83            "message_id": self.message_id,
84            "timestamp": self.timestamp,
85        }
class MessageType(builtins.str, enum.Enum):
50class MessageType(str, Enum):
51    # Lifecycle
52    START = "start"
53    STOP = "stop"
54    PAUSE = "pause"
55    RESUME = "resume"
56    DELETE = "delete"
57    # Communication
58    TASK = "task"
59    RESULT = "result"
60    HEARTBEAT = "heartbeat"
61    SPAWN = "spawn"
62    # Internal
63    TICK = "tick"
64    STATUS_REQUEST = "status_request"
65    STATUS_RESPONSE = "status_response"

str(object='') -> str str(bytes_or_buffer[, encoding[, errors]]) -> str

Create a new string object from the given object. If encoding or errors is specified, then the object must expose a data buffer that will be decoded using the given encoding and error handler. Otherwise, returns the result of object.__str__() (if defined) or repr(object). encoding defaults to 'utf-8'. errors defaults to 'strict'.

START = <MessageType.START: 'start'>
STOP = <MessageType.STOP: 'stop'>
PAUSE = <MessageType.PAUSE: 'pause'>
RESUME = <MessageType.RESUME: 'resume'>
DELETE = <MessageType.DELETE: 'delete'>
TASK = <MessageType.TASK: 'task'>
RESULT = <MessageType.RESULT: 'result'>
HEARTBEAT = <MessageType.HEARTBEAT: 'heartbeat'>
SPAWN = <MessageType.SPAWN: 'spawn'>
TICK = <MessageType.TICK: 'tick'>
STATUS_REQUEST = <MessageType.STATUS_REQUEST: 'status_request'>
STATUS_RESPONSE = <MessageType.STATUS_RESPONSE: 'status_response'>
class ActorSystem:
344class ActorSystem:
345    """Top-level orchestrator."""
346
347    def __init__(self, mqtt_broker: str = "localhost", mqtt_port: int = 1883,
348                 state_dir: str = "./state"):
349        self.registry     = ActorRegistry()
350        self._mqtt_broker = mqtt_broker
351        self._mqtt_port   = mqtt_port
352        self._mqtt_client = None
353        self._running     = False
354        self._supervisor: Optional[Supervisor] = None
355        self._state_dir   = state_dir
356
357    def _inject(self, actor: Actor):
358        """Inject MQTT client + broker/port into an actor so it can publish and subscribe."""
359        actor._mqtt_client = self._mqtt_client
360        actor._mqtt_broker = self._mqtt_broker
361        actor._mqtt_port   = self._mqtt_port
362
363    @property
364    def supervisor(self) -> Supervisor:
365        """Lazy-create the Supervisor bound to this system's registry and inject function."""
366        if self._supervisor is None:
367            self._supervisor = Supervisor(self.registry, self._inject)
368        return self._supervisor
369
370    def mqtt_status(self) -> dict:
371        """Return current MQTT publisher health — useful for dashboard and /nodes."""
372        if self._mqtt_client is None:
373            return {"connected": False, "queue_depth": 0, "available": False}
374        return {
375            "connected":   getattr(self._mqtt_client, "connected", False),
376            "queue_depth": getattr(self._mqtt_client, "queue_depth", 0),
377            "available":   getattr(self._mqtt_client, "_available", False),
378            "client_id":   getattr(self._mqtt_client, "_client_id", "?"),
379        }
380
381    async def start(self, *initial_actors: Actor):
382        self._running = True
383        import os
384        os.makedirs(self._state_dir, exist_ok=True)
385        db_path = os.path.join(self._state_dir, "mqtt_outbox.db")
386        self._mqtt_client = await _MQTTPublisher.create(
387            self._mqtt_broker, self._mqtt_port, db_path=db_path
388        )
389
390        # ── Initialise TopicBus (reactive pub/sub coordination layer) ─────
391        from .topic_bus import init_topic_bus
392        self.topic_bus = init_topic_bus(
393            mqtt_client  = self._mqtt_client,
394            mqtt_broker  = self._mqtt_broker,
395            mqtt_port    = self._mqtt_port,
396        )
397        logger.info("[ActorSystem] TopicBus initialised")
398
399        for actor in initial_actors:
400            self._inject(actor)
401            await self.registry.register(actor)
402            await actor.start()
403
404        logger.info(f"[ActorSystem] Started with {len(initial_actors)} actors.")
405
406    async def spawn(self, actor_class: Type[Actor], **kwargs) -> Actor:
407        """Spawn and register a new actor in the system."""
408        actor = actor_class(**kwargs)
409        self._inject(actor)
410        await self.registry.register(actor)
411        await actor.start()
412        return actor
413
414    async def stop_all(self):
415        self._running = False
416        # Stop supervisor first so it doesn't try to restart actors we're about to stop
417        if self._supervisor:
418            await self._supervisor.stop()
419        actors = self.registry.all_actors()
420        await asyncio.gather(*[a.stop() for a in actors], return_exceptions=True)
421        if self._mqtt_client:
422            await self._mqtt_client.disconnect()
423        logger.info("[ActorSystem] All actors stopped.")
424
425    async def run_forever(self):
426        try:
427            while self._running:
428                await asyncio.sleep(1)
429        except (KeyboardInterrupt, asyncio.CancelledError):
430            logger.info("[ActorSystem] Shutdown signal received.")
431            await self.stop_all()

Top-level orchestrator.

ActorSystem( mqtt_broker: str = 'localhost', mqtt_port: int = 1883, state_dir: str = './state')
347    def __init__(self, mqtt_broker: str = "localhost", mqtt_port: int = 1883,
348                 state_dir: str = "./state"):
349        self.registry     = ActorRegistry()
350        self._mqtt_broker = mqtt_broker
351        self._mqtt_port   = mqtt_port
352        self._mqtt_client = None
353        self._running     = False
354        self._supervisor: Optional[Supervisor] = None
355        self._state_dir   = state_dir
registry
supervisor: wactorz.core.registry.Supervisor
363    @property
364    def supervisor(self) -> Supervisor:
365        """Lazy-create the Supervisor bound to this system's registry and inject function."""
366        if self._supervisor is None:
367            self._supervisor = Supervisor(self.registry, self._inject)
368        return self._supervisor

Lazy-create the Supervisor bound to this system's registry and inject function.

def mqtt_status(self) -> dict:
370    def mqtt_status(self) -> dict:
371        """Return current MQTT publisher health — useful for dashboard and /nodes."""
372        if self._mqtt_client is None:
373            return {"connected": False, "queue_depth": 0, "available": False}
374        return {
375            "connected":   getattr(self._mqtt_client, "connected", False),
376            "queue_depth": getattr(self._mqtt_client, "queue_depth", 0),
377            "available":   getattr(self._mqtt_client, "_available", False),
378            "client_id":   getattr(self._mqtt_client, "_client_id", "?"),
379        }

Return current MQTT publisher health — useful for dashboard and /nodes.

async def start(self, *initial_actors: Actor):
381    async def start(self, *initial_actors: Actor):
382        self._running = True
383        import os
384        os.makedirs(self._state_dir, exist_ok=True)
385        db_path = os.path.join(self._state_dir, "mqtt_outbox.db")
386        self._mqtt_client = await _MQTTPublisher.create(
387            self._mqtt_broker, self._mqtt_port, db_path=db_path
388        )
389
390        # ── Initialise TopicBus (reactive pub/sub coordination layer) ─────
391        from .topic_bus import init_topic_bus
392        self.topic_bus = init_topic_bus(
393            mqtt_client  = self._mqtt_client,
394            mqtt_broker  = self._mqtt_broker,
395            mqtt_port    = self._mqtt_port,
396        )
397        logger.info("[ActorSystem] TopicBus initialised")
398
399        for actor in initial_actors:
400            self._inject(actor)
401            await self.registry.register(actor)
402            await actor.start()
403
404        logger.info(f"[ActorSystem] Started with {len(initial_actors)} actors.")
async def spawn( self, actor_class: Type[Actor], **kwargs) -> Actor:
406    async def spawn(self, actor_class: Type[Actor], **kwargs) -> Actor:
407        """Spawn and register a new actor in the system."""
408        actor = actor_class(**kwargs)
409        self._inject(actor)
410        await self.registry.register(actor)
411        await actor.start()
412        return actor

Spawn and register a new actor in the system.

async def stop_all(self):
414    async def stop_all(self):
415        self._running = False
416        # Stop supervisor first so it doesn't try to restart actors we're about to stop
417        if self._supervisor:
418            await self._supervisor.stop()
419        actors = self.registry.all_actors()
420        await asyncio.gather(*[a.stop() for a in actors], return_exceptions=True)
421        if self._mqtt_client:
422            await self._mqtt_client.disconnect()
423        logger.info("[ActorSystem] All actors stopped.")
async def run_forever(self):
425    async def run_forever(self):
426        try:
427            while self._running:
428                await asyncio.sleep(1)
429        except (KeyboardInterrupt, asyncio.CancelledError):
430            logger.info("[ActorSystem] Shutdown signal received.")
431            await self.stop_all()
class ActorRegistry:
 61class ActorRegistry:
 62    """Maintains a map of all living actors and routes messages between them."""
 63
 64    def __init__(self):
 65        self._actors: dict[str, Actor] = {}
 66        self._lock = asyncio.Lock()
 67
 68    async def register(self, actor: Actor):
 69        async with self._lock:
 70            actor._registry = self
 71            self._actors[actor.actor_id] = actor
 72            logger.info(f"[Registry] Registered {actor.name} ({actor.actor_id[:8]})")
 73
 74    async def unregister(self, actor_id: str):
 75        async with self._lock:
 76            if actor_id in self._actors:
 77                del self._actors[actor_id]
 78                logger.info(f"[Registry] Unregistered {actor_id[:8]}")
 79
 80    async def deliver(self, target_id: str, msg: Message) -> bool:
 81        actor = self._actors.get(target_id)
 82        if actor is None:
 83            logger.warning(f"[Registry] Unknown target: {target_id[:8]}")
 84            return False
 85        await actor.receive(msg)
 86        return True
 87
 88    async def broadcast(self, sender_id: str, msg_type: MessageType, payload=None):
 89        msg = Message(type=msg_type, sender_id=sender_id, payload=payload)
 90        for actor_id, actor in list(self._actors.items()):
 91            if actor_id != sender_id:
 92                await actor.receive(msg)
 93
 94    def get(self, actor_id: str) -> Optional[Actor]:
 95        return self._actors.get(actor_id)
 96
 97    def all_actors(self) -> list[Actor]:
 98        return list(self._actors.values())
 99
100    def find_by_name(self, name: str) -> Optional[Actor]:
101        for actor in self._actors.values():
102            if actor.name == name:
103                return actor
104        return None
105
106    def __len__(self):
107        return len(self._actors)

Maintains a map of all living actors and routes messages between them.

async def register(self, actor: Actor):
68    async def register(self, actor: Actor):
69        async with self._lock:
70            actor._registry = self
71            self._actors[actor.actor_id] = actor
72            logger.info(f"[Registry] Registered {actor.name} ({actor.actor_id[:8]})")
async def unregister(self, actor_id: str):
74    async def unregister(self, actor_id: str):
75        async with self._lock:
76            if actor_id in self._actors:
77                del self._actors[actor_id]
78                logger.info(f"[Registry] Unregistered {actor_id[:8]}")
async def deliver(self, target_id: str, msg: Message) -> bool:
80    async def deliver(self, target_id: str, msg: Message) -> bool:
81        actor = self._actors.get(target_id)
82        if actor is None:
83            logger.warning(f"[Registry] Unknown target: {target_id[:8]}")
84            return False
85        await actor.receive(msg)
86        return True
async def broadcast( self, sender_id: str, msg_type: MessageType, payload=None):
88    async def broadcast(self, sender_id: str, msg_type: MessageType, payload=None):
89        msg = Message(type=msg_type, sender_id=sender_id, payload=payload)
90        for actor_id, actor in list(self._actors.items()):
91            if actor_id != sender_id:
92                await actor.receive(msg)
def get(self, actor_id: str) -> Optional[Actor]:
94    def get(self, actor_id: str) -> Optional[Actor]:
95        return self._actors.get(actor_id)
def all_actors(self) -> list[Actor]:
97    def all_actors(self) -> list[Actor]:
98        return list(self._actors.values())
def find_by_name(self, name: str) -> Optional[Actor]:
100    def find_by_name(self, name: str) -> Optional[Actor]:
101        for actor in self._actors.values():
102            if actor.name == name:
103                return actor
104        return None
class LLMAgent(wactorz.Actor):
465class LLMAgent(Actor):
466    """
467    An Actor that uses an LLM to process tasks.
468    Maintains conversation history and supports tool use.
469    """
470
471    def __init__(
472        self,
473        llm_provider: Optional[LLMProvider] = None,
474        system_prompt: str = "You are a helpful AI agent.",
475        max_history: int = 20,
476        summarize_threshold: int = 30,
477        **kwargs,
478    ):
479        super().__init__(**kwargs)
480        self.llm = llm_provider
481        self.system_prompt = system_prompt
482        self.max_history = max_history
483        self.summarize_threshold = summarize_threshold  # compress when history exceeds this
484        self._conversation_history: list[dict] = []
485        self._history_summary: str = ""   # rolling summary of compressed messages
486        self._current_task = "idle"
487        # Cost / token tracking — must be set here so subclasses (MainActor etc.) inherit them
488        self.total_input_tokens  = 0
489        self.total_output_tokens = 0
490        self.total_cost_usd      = 0.0
491
492    def _current_task_description(self) -> str:
493        return self._current_task
494
495    async def on_start(self):
496        # Restore conversation history and rolling summary from persistence
497        saved = self.recall("conversation_history", [])
498        clean = []
499        for m in saved:
500            if not isinstance(m, dict):
501                continue
502            role    = m.get("role", "")
503            content = m.get("content", "")
504            if role not in ("user", "assistant"):
505                continue
506            if not isinstance(content, str):
507                content = str(content)
508            if content.strip():
509                clean.append({"role": role, "content": content})
510        self._conversation_history = clean[-self.max_history:]
511        self._history_summary = self.recall("history_summary", "")
512
513        # Publish capability manifest so main's topic registry knows this agent exists
514        description = (
515            getattr(self, "DESCRIPTION", None)
516            or (self.__class__.__doc__ or "").strip().split("\n")[0]
517            or self.name
518        )
519        capabilities  = getattr(self, "CAPABILITIES", [])
520        input_schema  = getattr(self, "INPUT_SCHEMA",  {})
521        output_schema = getattr(self, "OUTPUT_SCHEMA", {})
522        await self.publish_manifest(
523            description=description,
524            capabilities=capabilities,
525            input_schema=input_schema,
526            output_schema=output_schema,
527        )
528
529    async def on_stop(self):
530        self.persist("conversation_history", self._conversation_history)
531        self.persist("history_summary", self._history_summary)
532
533    async def _maybe_summarize(self):
534        """
535        If history exceeds summarize_threshold, compress the oldest half into a
536        rolling summary and keep only the most recent max_history messages.
537        The summary is prepended as a system-style context message when sending
538        to the LLM so no facts are lost.
539        """
540        if len(self._conversation_history) < self.summarize_threshold:
541            return
542        if self.llm is None:
543            # No LLM — just truncate
544            self._conversation_history = self._conversation_history[-self.max_history:]
545            return
546
547        # Split: compress the older half, keep the recent half
548        split = len(self._conversation_history) // 2
549        to_compress = self._conversation_history[:split]
550        to_keep     = self._conversation_history[split:]
551
552        # Build compression prompt
553        prior_summary = f"Previous summary:\n{self._history_summary}\n\n" if self._history_summary else ""
554        messages_text = "\n".join(
555            f"{m['role'].upper()}: {m['content'][:400]}"
556            for m in to_compress
557        )
558        prompt = (
559            f"{prior_summary}"
560            f"Summarize the following conversation segment concisely. "
561            f"Preserve: key facts, decisions, user preferences, entity names, URLs, credentials, "
562            f"any technical details mentioned. Be specific, not vague.\n\n"
563            f"{messages_text}"
564        )
565        try:
566            summary, usage = await self.llm.complete(
567                messages=[{"role": "user", "content": prompt}],
568                system="You are a conversation summarizer. Output a dense, factual summary. No preamble.",
569                max_tokens=400,
570            )
571            self.total_input_tokens  += usage.get("input_tokens", 0)
572            self.total_output_tokens += usage.get("output_tokens", 0)
573            self.total_cost_usd      += usage.get("cost_usd", 0.0)
574            self._history_summary = summary.strip()
575            self._conversation_history = to_keep
576            self.persist("history_summary", self._history_summary)
577            self.persist("conversation_history", self._conversation_history)
578            logger.info(f"[{self.name}] History summarized: {len(to_compress)} messages → summary ({len(summary)} chars), keeping {len(to_keep)}")
579        except Exception as e:
580            logger.warning(f"[{self.name}] Summarization failed: {e} — truncating instead")
581            self._conversation_history = self._conversation_history[-self.max_history:]
582
583    def _build_messages_with_summary(self, n: int) -> list[dict]:
584        """
585        Build the message list to send to the LLM, prepending the rolling summary
586        as context if one exists.
587        """
588        recent = self._conversation_history[-n:]
589        if not self._history_summary:
590            return recent
591        # Inject summary as a user/assistant exchange so it fits the messages format
592        summary_ctx = [{
593            "role": "user",
594            "content": f"[Context from earlier in our conversation]\n{self._history_summary}"
595        }, {
596            "role": "assistant",
597            "content": "Understood, I have that context."
598        }]
599        return summary_ctx + recent
600
601    async def handle_message(self, msg: Message):
602        if msg.type == MessageType.TASK:
603            await self._handle_task(msg)
604
605    async def _handle_task(self, msg: Message):
606        if isinstance(msg.payload, dict):
607            # Accept "text", "task", "message", or fall back to JSON dump
608            task_text = (
609                msg.payload.get("text")
610                or msg.payload.get("task")
611                or msg.payload.get("message")
612                or msg.payload.get("query")
613                or str(msg.payload)
614            )
615        else:
616            task_text = str(msg.payload) if msg.payload is not None else ""
617        self._current_task = task_text[:60]
618
619        if self.llm is None:
620            logger.warning(f"[{self.name}] No LLM provider configured.")
621            return
622
623        start = time.time()
624        try:
625            self._conversation_history.append({"role": "user", "content": task_text})
626
627            response, _usage = await self.llm.complete(
628                messages=self._conversation_history[-self.max_history:],
629                system=self.system_prompt,
630            )
631
632            self._conversation_history.append({"role": "assistant", "content": response})
633            self.metrics.tasks_completed += 1
634            duration = time.time() - start
635
636            # Persist after each exchange
637            self.persist("conversation_history", self._conversation_history)
638
639            # Publish completion
640            await self._mqtt_publish(
641                f"agents/{self.actor_id}/completed",
642                {
643                    "result_preview": response[:200],
644                    "duration": duration,
645                    "task": task_text[:60],
646                },
647            )
648
649            # Reply to sender — echo _task_id so send_to() futures resolve
650            payload_dict = msg.payload if isinstance(msg.payload, dict) else {}
651            task_id  = payload_dict.get("_task_id")
652            reply_to = payload_dict.get("_reply_to") or msg.reply_to or msg.sender_id
653            if reply_to:
654                result = {"text": response, "task": task_text, "duration": duration}
655                if task_id:
656                    result["_task_id"] = task_id
657                await self.send(reply_to, MessageType.RESULT, result)
658
659        except Exception as e:
660            self.metrics.tasks_failed += 1
661            self.state_value = "failed_task"
662            logger.error(f"[{self.name}] LLM task failed: {e}", exc_info=True)
663
664        finally:
665            self._current_task = "idle"
666
667    async def chat(self, user_message: str) -> str:
668        """Direct async call - useful for the main conversation actor."""
669        if self.llm is None:
670            return "[No LLM configured]"
671
672        self.metrics.messages_processed += 1
673        self._conversation_history.append({"role": "user", "content": user_message})
674
675        safe_history = [
676            {"role": m["role"], "content": str(m["content"])}
677            for m in self._build_messages_with_summary(self.max_history)
678            if isinstance(m, dict)
679            and m.get("role") in ("user", "assistant")
680            and m.get("content") is not None
681        ]
682        response, usage = await self.llm.complete(
683            messages=safe_history,
684            system=self.system_prompt,
685        )
686        self._conversation_history.append({"role": "assistant", "content": response})
687        await self._maybe_summarize()
688        self.persist("conversation_history", self._conversation_history)
689
690        # Accumulate token usage and cost
691        self.total_input_tokens  += usage.get("input_tokens", 0)
692        self.total_output_tokens += usage.get("output_tokens", 0)
693        self.total_cost_usd      += usage.get("cost_usd", 0.0)
694
695        await self._mqtt_publish(
696            f"agents/{self.actor_id}/metrics",
697            self._build_metrics(),
698        )
699        return response
700
701    async def chat_stream(self, user_message: str):
702        """
703        Streaming version of chat(). Yields text chunks, then a final usage dict.
704        The caller is responsible for printing chunks as they arrive.
705
706        Usage:
707            async for chunk in agent.chat_stream("hello"):
708                if isinstance(chunk, dict):
709                    usage = chunk   # final usage summary
710                else:
711                    print(chunk, end="", flush=True)
712        """
713        if self.llm is None or not hasattr(self.llm, "stream"):
714            # Fallback: non-streaming — yield whole response as single chunk
715            response = await self.chat(user_message)
716            yield response
717            return
718
719        self.metrics.messages_processed += 1
720        self._conversation_history.append({"role": "user", "content": user_message})
721
722        full_text = []
723        usage     = {}
724
725        safe_history = [
726            {"role": m["role"], "content": str(m["content"])}
727            for m in self._build_messages_with_summary(self.max_history)
728            if isinstance(m, dict)
729            and m.get("role") in ("user", "assistant")
730            and m.get("content") is not None
731        ]
732        async for chunk in self.llm.stream(
733            messages=safe_history,
734            system=self.system_prompt,
735        ):
736            if isinstance(chunk, dict):
737                usage = chunk
738            else:
739                full_text.append(chunk)
740                yield chunk
741
742        response = "".join(full_text)
743        self._conversation_history.append({"role": "assistant", "content": response})
744        await self._maybe_summarize()
745        self.persist("conversation_history", self._conversation_history)
746
747        self.total_input_tokens  += usage.get("input_tokens", 0)
748        self.total_output_tokens += usage.get("output_tokens", 0)
749        self.total_cost_usd      += usage.get("cost_usd", 0.0)
750
751        await self._mqtt_publish(
752            f"agents/{self.actor_id}/metrics",
753            self._build_metrics(),
754        )
755
756        # Yield final usage dict so caller can log it
757        yield usage
758
759    def _build_metrics(self) -> dict:
760        m = super()._build_metrics()
761        m["input_tokens"]  = self.total_input_tokens
762        m["output_tokens"] = self.total_output_tokens
763        m["cost_usd"]      = round(self.total_cost_usd, 6)
764        return m
765
766    def clear_history(self):
767        self._conversation_history = []

An Actor that uses an LLM to process tasks. Maintains conversation history and supports tool use.

LLMAgent( llm_provider: Optional[wactorz.agents.llm_agent.LLMProvider] = None, system_prompt: str = 'You are a helpful AI agent.', max_history: int = 20, summarize_threshold: int = 30, **kwargs)
471    def __init__(
472        self,
473        llm_provider: Optional[LLMProvider] = None,
474        system_prompt: str = "You are a helpful AI agent.",
475        max_history: int = 20,
476        summarize_threshold: int = 30,
477        **kwargs,
478    ):
479        super().__init__(**kwargs)
480        self.llm = llm_provider
481        self.system_prompt = system_prompt
482        self.max_history = max_history
483        self.summarize_threshold = summarize_threshold  # compress when history exceeds this
484        self._conversation_history: list[dict] = []
485        self._history_summary: str = ""   # rolling summary of compressed messages
486        self._current_task = "idle"
487        # Cost / token tracking — must be set here so subclasses (MainActor etc.) inherit them
488        self.total_input_tokens  = 0
489        self.total_output_tokens = 0
490        self.total_cost_usd      = 0.0
llm
system_prompt
max_history
summarize_threshold
total_input_tokens
total_output_tokens
total_cost_usd
async def on_start(self):
495    async def on_start(self):
496        # Restore conversation history and rolling summary from persistence
497        saved = self.recall("conversation_history", [])
498        clean = []
499        for m in saved:
500            if not isinstance(m, dict):
501                continue
502            role    = m.get("role", "")
503            content = m.get("content", "")
504            if role not in ("user", "assistant"):
505                continue
506            if not isinstance(content, str):
507                content = str(content)
508            if content.strip():
509                clean.append({"role": role, "content": content})
510        self._conversation_history = clean[-self.max_history:]
511        self._history_summary = self.recall("history_summary", "")
512
513        # Publish capability manifest so main's topic registry knows this agent exists
514        description = (
515            getattr(self, "DESCRIPTION", None)
516            or (self.__class__.__doc__ or "").strip().split("\n")[0]
517            or self.name
518        )
519        capabilities  = getattr(self, "CAPABILITIES", [])
520        input_schema  = getattr(self, "INPUT_SCHEMA",  {})
521        output_schema = getattr(self, "OUTPUT_SCHEMA", {})
522        await self.publish_manifest(
523            description=description,
524            capabilities=capabilities,
525            input_schema=input_schema,
526            output_schema=output_schema,
527        )

Called when actor starts. Override for init logic.

async def on_stop(self):
529    async def on_stop(self):
530        self.persist("conversation_history", self._conversation_history)
531        self.persist("history_summary", self._history_summary)

Called when actor stops. Override for cleanup.

async def handle_message(self, msg: Message):
601    async def handle_message(self, msg: Message):
602        if msg.type == MessageType.TASK:
603            await self._handle_task(msg)

Handle messages not caught by default handlers.

async def chat(self, user_message: str) -> str:
667    async def chat(self, user_message: str) -> str:
668        """Direct async call - useful for the main conversation actor."""
669        if self.llm is None:
670            return "[No LLM configured]"
671
672        self.metrics.messages_processed += 1
673        self._conversation_history.append({"role": "user", "content": user_message})
674
675        safe_history = [
676            {"role": m["role"], "content": str(m["content"])}
677            for m in self._build_messages_with_summary(self.max_history)
678            if isinstance(m, dict)
679            and m.get("role") in ("user", "assistant")
680            and m.get("content") is not None
681        ]
682        response, usage = await self.llm.complete(
683            messages=safe_history,
684            system=self.system_prompt,
685        )
686        self._conversation_history.append({"role": "assistant", "content": response})
687        await self._maybe_summarize()
688        self.persist("conversation_history", self._conversation_history)
689
690        # Accumulate token usage and cost
691        self.total_input_tokens  += usage.get("input_tokens", 0)
692        self.total_output_tokens += usage.get("output_tokens", 0)
693        self.total_cost_usd      += usage.get("cost_usd", 0.0)
694
695        await self._mqtt_publish(
696            f"agents/{self.actor_id}/metrics",
697            self._build_metrics(),
698        )
699        return response

Direct async call - useful for the main conversation actor.

async def chat_stream(self, user_message: str):
701    async def chat_stream(self, user_message: str):
702        """
703        Streaming version of chat(). Yields text chunks, then a final usage dict.
704        The caller is responsible for printing chunks as they arrive.
705
706        Usage:
707            async for chunk in agent.chat_stream("hello"):
708                if isinstance(chunk, dict):
709                    usage = chunk   # final usage summary
710                else:
711                    print(chunk, end="", flush=True)
712        """
713        if self.llm is None or not hasattr(self.llm, "stream"):
714            # Fallback: non-streaming — yield whole response as single chunk
715            response = await self.chat(user_message)
716            yield response
717            return
718
719        self.metrics.messages_processed += 1
720        self._conversation_history.append({"role": "user", "content": user_message})
721
722        full_text = []
723        usage     = {}
724
725        safe_history = [
726            {"role": m["role"], "content": str(m["content"])}
727            for m in self._build_messages_with_summary(self.max_history)
728            if isinstance(m, dict)
729            and m.get("role") in ("user", "assistant")
730            and m.get("content") is not None
731        ]
732        async for chunk in self.llm.stream(
733            messages=safe_history,
734            system=self.system_prompt,
735        ):
736            if isinstance(chunk, dict):
737                usage = chunk
738            else:
739                full_text.append(chunk)
740                yield chunk
741
742        response = "".join(full_text)
743        self._conversation_history.append({"role": "assistant", "content": response})
744        await self._maybe_summarize()
745        self.persist("conversation_history", self._conversation_history)
746
747        self.total_input_tokens  += usage.get("input_tokens", 0)
748        self.total_output_tokens += usage.get("output_tokens", 0)
749        self.total_cost_usd      += usage.get("cost_usd", 0.0)
750
751        await self._mqtt_publish(
752            f"agents/{self.actor_id}/metrics",
753            self._build_metrics(),
754        )
755
756        # Yield final usage dict so caller can log it
757        yield usage

Streaming version of chat(). Yields text chunks, then a final usage dict. The caller is responsible for printing chunks as they arrive.

Usage: async for chunk in agent.chat_stream("hello"): if isinstance(chunk, dict): usage = chunk # final usage summary else: print(chunk, end="", flush=True)

def clear_history(self):
766    def clear_history(self):
767        self._conversation_history = []
class AnthropicProvider(wactorz.agents.llm_agent.LLMProvider):
 84class AnthropicProvider(LLMProvider):
 85    def __init__(self, model: str = "claude-sonnet-4-6", api_key: Optional[str] = None):
 86        import anthropic
 87        self.client = anthropic.AsyncAnthropic(api_key=api_key)
 88        self.model = model
 89
 90    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
 91        response = await self.client.messages.create(
 92            model=self.model,
 93            max_tokens=kwargs.get("max_tokens", 4096),
 94            system=system,
 95            messages=messages,
 96        )
 97        text = response.content[0].text
 98        usage = {
 99            "input_tokens":  response.usage.input_tokens,
100            "output_tokens": response.usage.output_tokens,
101            "cost_usd":      _calc_cost(self.model,
102                                        response.usage.input_tokens,
103                                        response.usage.output_tokens),
104        }
105        return text, usage
106
107    async def stream(self, messages: list[dict], system: str = "", **kwargs):
108        """Yield text chunks as they arrive. Final item is a dict with usage."""
109        input_tokens = output_tokens = 0
110        async with self.client.messages.stream(
111            model=self.model,
112            max_tokens=kwargs.get("max_tokens", 4096),
113            system=system,
114            messages=messages,
115        ) as s:
116            async for chunk in s.text_stream:
117                yield chunk
118            # Final message has usage counts
119            final = await s.get_final_message()
120            input_tokens  = final.usage.input_tokens
121            output_tokens = final.usage.output_tokens
122        yield {
123            "input_tokens":  input_tokens,
124            "output_tokens": output_tokens,
125            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
126        }

Base class for LLM providers.

AnthropicProvider(model: str = 'claude-sonnet-4-6', api_key: Optional[str] = None)
85    def __init__(self, model: str = "claude-sonnet-4-6", api_key: Optional[str] = None):
86        import anthropic
87        self.client = anthropic.AsyncAnthropic(api_key=api_key)
88        self.model = model
client
model
async def complete( self, messages: list[dict], system: str = '', **kwargs) -> tuple[str, dict]:
 90    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
 91        response = await self.client.messages.create(
 92            model=self.model,
 93            max_tokens=kwargs.get("max_tokens", 4096),
 94            system=system,
 95            messages=messages,
 96        )
 97        text = response.content[0].text
 98        usage = {
 99            "input_tokens":  response.usage.input_tokens,
100            "output_tokens": response.usage.output_tokens,
101            "cost_usd":      _calc_cost(self.model,
102                                        response.usage.input_tokens,
103                                        response.usage.output_tokens),
104        }
105        return text, usage

Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}

async def stream(self, messages: list[dict], system: str = '', **kwargs):
107    async def stream(self, messages: list[dict], system: str = "", **kwargs):
108        """Yield text chunks as they arrive. Final item is a dict with usage."""
109        input_tokens = output_tokens = 0
110        async with self.client.messages.stream(
111            model=self.model,
112            max_tokens=kwargs.get("max_tokens", 4096),
113            system=system,
114            messages=messages,
115        ) as s:
116            async for chunk in s.text_stream:
117                yield chunk
118            # Final message has usage counts
119            final = await s.get_final_message()
120            input_tokens  = final.usage.input_tokens
121            output_tokens = final.usage.output_tokens
122        yield {
123            "input_tokens":  input_tokens,
124            "output_tokens": output_tokens,
125            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
126        }

Yield text chunks as they arrive. Final item is a dict with usage.

class OpenAIProvider(wactorz.agents.llm_agent.LLMProvider):
129class OpenAIProvider(LLMProvider):
130    def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
131        import openai
132        self.client = openai.AsyncOpenAI(api_key=api_key)
133        self.model = model
134
135    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
136        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
137        params = {
138            "model": self.model,
139            "messages": full_messages,
140            "max_completion_tokens": kwargs.get("max_tokens", 4096),
141        }
142        reasoning_effort = kwargs.get("reasoning_effort")
143        if reasoning_effort:
144            params["reasoning_effort"] = reasoning_effort
145        try:
146            response = await self.client.chat.completions.create(**params)
147        except Exception as exc:
148            if reasoning_effort and "reasoning_effort" in str(exc):
149                params.pop("reasoning_effort", None)
150                response = await self.client.chat.completions.create(**params)
151            else:
152                raise
153        text = response.choices[0].message.content
154        usage = {
155            "input_tokens":  response.usage.prompt_tokens,
156            "output_tokens": response.usage.completion_tokens,
157            "cost_usd":      _calc_cost(self.model,
158                                        response.usage.prompt_tokens,
159                                        response.usage.completion_tokens),
160        }
161        return text, usage
162
163    async def stream(self, messages: list[dict], system: str = "", **kwargs):
164        """Yield text chunks as they arrive. Final item is a dict with usage."""
165        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
166        input_tokens = output_tokens = 0
167        params = {
168            "model": self.model,
169            "messages": full_messages,
170            "max_completion_tokens": kwargs.get("max_tokens", 4096),
171            "stream": True,
172            "stream_options": {"include_usage": True},
173        }
174        reasoning_effort = kwargs.get("reasoning_effort")
175        if reasoning_effort:
176            params["reasoning_effort"] = reasoning_effort
177        try:
178            stream = await self.client.chat.completions.create(**params)
179        except Exception as exc:
180            if reasoning_effort and "reasoning_effort" in str(exc):
181                params.pop("reasoning_effort", None)
182                stream = await self.client.chat.completions.create(**params)
183            else:
184                raise
185        async with stream as s:
186            async for chunk in s:
187                delta = chunk.choices[0].delta.content if chunk.choices else None
188                if delta:
189                    yield delta
190                if chunk.usage:
191                    input_tokens  = chunk.usage.prompt_tokens
192                    output_tokens = chunk.usage.completion_tokens
193        yield {
194            "input_tokens":  input_tokens,
195            "output_tokens": output_tokens,
196            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
197        }

Base class for LLM providers.

OpenAIProvider(model: str = 'gpt-4o', api_key: Optional[str] = None)
130    def __init__(self, model: str = "gpt-4o", api_key: Optional[str] = None):
131        import openai
132        self.client = openai.AsyncOpenAI(api_key=api_key)
133        self.model = model
client
model
async def complete( self, messages: list[dict], system: str = '', **kwargs) -> tuple[str, dict]:
135    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
136        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
137        params = {
138            "model": self.model,
139            "messages": full_messages,
140            "max_completion_tokens": kwargs.get("max_tokens", 4096),
141        }
142        reasoning_effort = kwargs.get("reasoning_effort")
143        if reasoning_effort:
144            params["reasoning_effort"] = reasoning_effort
145        try:
146            response = await self.client.chat.completions.create(**params)
147        except Exception as exc:
148            if reasoning_effort and "reasoning_effort" in str(exc):
149                params.pop("reasoning_effort", None)
150                response = await self.client.chat.completions.create(**params)
151            else:
152                raise
153        text = response.choices[0].message.content
154        usage = {
155            "input_tokens":  response.usage.prompt_tokens,
156            "output_tokens": response.usage.completion_tokens,
157            "cost_usd":      _calc_cost(self.model,
158                                        response.usage.prompt_tokens,
159                                        response.usage.completion_tokens),
160        }
161        return text, usage

Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}

async def stream(self, messages: list[dict], system: str = '', **kwargs):
163    async def stream(self, messages: list[dict], system: str = "", **kwargs):
164        """Yield text chunks as they arrive. Final item is a dict with usage."""
165        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
166        input_tokens = output_tokens = 0
167        params = {
168            "model": self.model,
169            "messages": full_messages,
170            "max_completion_tokens": kwargs.get("max_tokens", 4096),
171            "stream": True,
172            "stream_options": {"include_usage": True},
173        }
174        reasoning_effort = kwargs.get("reasoning_effort")
175        if reasoning_effort:
176            params["reasoning_effort"] = reasoning_effort
177        try:
178            stream = await self.client.chat.completions.create(**params)
179        except Exception as exc:
180            if reasoning_effort and "reasoning_effort" in str(exc):
181                params.pop("reasoning_effort", None)
182                stream = await self.client.chat.completions.create(**params)
183            else:
184                raise
185        async with stream as s:
186            async for chunk in s:
187                delta = chunk.choices[0].delta.content if chunk.choices else None
188                if delta:
189                    yield delta
190                if chunk.usage:
191                    input_tokens  = chunk.usage.prompt_tokens
192                    output_tokens = chunk.usage.completion_tokens
193        yield {
194            "input_tokens":  input_tokens,
195            "output_tokens": output_tokens,
196            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
197        }

Yield text chunks as they arrive. Final item is a dict with usage.

class OllamaProvider(wactorz.agents.llm_agent.LLMProvider):
200class OllamaProvider(LLMProvider):
201    """Local LLM via Ollama."""
202    def __init__(self, model: str = "llama3", base_url: str = "http://localhost:11434"):
203        self.model = model
204        self.base_url = base_url
205
206    @staticmethod
207    def _chat_messages(messages: list[dict], system: str = "") -> list[dict]:
208        if not system:
209            return list(messages)
210        return [{"role": "system", "content": system}] + list(messages)
211
212    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
213        import aiohttp
214        payload = {
215            "model": self.model,
216            "messages": self._chat_messages(messages, system),
217            "stream": False,
218        }
219        async with aiohttp.ClientSession() as session:
220            async with session.post(f"{self.base_url}/api/chat", json=payload) as resp:
221                data = await resp.json()
222        text = data["message"]["content"]
223        prompt_eval = data.get("prompt_eval_count", 0)
224        eval_count  = data.get("eval_count", 0)
225        usage = {"input_tokens": prompt_eval, "output_tokens": eval_count, "cost_usd": 0.0}
226        return text, usage
227
228    async def stream(self, messages: list[dict], system: str = "", **kwargs):
229        """Yield text chunks as they arrive. Final item is a dict with usage."""
230        import aiohttp, json as _json
231        payload = {
232            "model": self.model,
233            "messages": self._chat_messages(messages, system),
234            "stream": True,
235        }
236        input_tokens = output_tokens = 0
237        async with aiohttp.ClientSession() as session:
238            async with session.post(f"{self.base_url}/api/chat", json=payload) as resp:
239                async for raw in resp.content:
240                    if not raw.strip():
241                        continue
242                    try:
243                        data = _json.loads(raw)
244                    except Exception:
245                        continue
246                    delta = (data.get("message") or {}).get("content", "")
247                    if delta:
248                        yield delta
249                    if data.get("done"):
250                        input_tokens  = data.get("prompt_eval_count", 0)
251                        output_tokens = data.get("eval_count", 0)
252        yield {"input_tokens": input_tokens, "output_tokens": output_tokens, "cost_usd": 0.0}

Local LLM via Ollama.

OllamaProvider(model: str = 'llama3', base_url: str = 'http://localhost:11434')
202    def __init__(self, model: str = "llama3", base_url: str = "http://localhost:11434"):
203        self.model = model
204        self.base_url = base_url
model
base_url
async def complete( self, messages: list[dict], system: str = '', **kwargs) -> tuple[str, dict]:
212    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
213        import aiohttp
214        payload = {
215            "model": self.model,
216            "messages": self._chat_messages(messages, system),
217            "stream": False,
218        }
219        async with aiohttp.ClientSession() as session:
220            async with session.post(f"{self.base_url}/api/chat", json=payload) as resp:
221                data = await resp.json()
222        text = data["message"]["content"]
223        prompt_eval = data.get("prompt_eval_count", 0)
224        eval_count  = data.get("eval_count", 0)
225        usage = {"input_tokens": prompt_eval, "output_tokens": eval_count, "cost_usd": 0.0}
226        return text, usage

Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}

async def stream(self, messages: list[dict], system: str = '', **kwargs):
228    async def stream(self, messages: list[dict], system: str = "", **kwargs):
229        """Yield text chunks as they arrive. Final item is a dict with usage."""
230        import aiohttp, json as _json
231        payload = {
232            "model": self.model,
233            "messages": self._chat_messages(messages, system),
234            "stream": True,
235        }
236        input_tokens = output_tokens = 0
237        async with aiohttp.ClientSession() as session:
238            async with session.post(f"{self.base_url}/api/chat", json=payload) as resp:
239                async for raw in resp.content:
240                    if not raw.strip():
241                        continue
242                    try:
243                        data = _json.loads(raw)
244                    except Exception:
245                        continue
246                    delta = (data.get("message") or {}).get("content", "")
247                    if delta:
248                        yield delta
249                    if data.get("done"):
250                        input_tokens  = data.get("prompt_eval_count", 0)
251                        output_tokens = data.get("eval_count", 0)
252        yield {"input_tokens": input_tokens, "output_tokens": output_tokens, "cost_usd": 0.0}

Yield text chunks as they arrive. Final item is a dict with usage.

class NIMProvider(wactorz.agents.llm_agent.LLMProvider):
255class NIMProvider(LLMProvider):
256    """
257    NVIDIA NIM — OpenAI-compatible API hosted at integrate.api.nvidia.com.
258    Free tier: 1000 requests/month per model. No local GPU required.
259
260    Popular free models:
261      meta/llama-3.1-8b-instruct          — fast, lightweight
262      meta/llama-3.3-70b-instruct         — strong general purpose
263      mistralai/mistral-7b-instruct-v0.3  — fast & capable
264      mistralai/mixtral-8x7b-instruct-v0.1
265      google/gemma-3-27b-it
266      microsoft/phi-3-mini-128k-instruct
267      deepseek-ai/deepseek-r1             — reasoning model
268      deepseek-ai/deepseek-r1-distill-qwen-7b
269      nvidia/llama-3.1-nemotron-70b-instruct
270      nvidia/llama-3.3-nemotron-super-49b-v1
271
272    Get a free API key at: https://build.nvidia.com
273    """
274
275    NIM_BASE_URL = "https://integrate.api.nvidia.com/v1"
276
277    def __init__(
278        self,
279        model:    str = "meta/llama-3.3-70b-instruct",
280        api_key:  Optional[str] = None,
281        base_url: str = NIM_BASE_URL,
282    ):
283        import openai
284        self.model  = model
285        self.client = openai.AsyncOpenAI(
286            api_key=api_key or "dummy",   # NIM free tier may not require a key locally
287            base_url=base_url,
288        )
289
290    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
291        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
292        response = await self.client.chat.completions.create(
293            model=self.model,
294            messages=full_messages,
295            max_tokens=kwargs.get("max_tokens", 4096),
296        )
297        text = response.choices[0].message.content
298        input_tok  = response.usage.prompt_tokens     if response.usage else 0
299        output_tok = response.usage.completion_tokens if response.usage else 0
300        usage = {
301            "input_tokens":  input_tok,
302            "output_tokens": output_tok,
303            "cost_usd":      _calc_cost(self.model, input_tok, output_tok),
304        }
305        return text, usage
306
307    async def stream(self, messages: list[dict], system: str = "", **kwargs):
308        """Yield text chunks as they arrive. Final item is a dict with usage."""
309        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
310        input_tokens = output_tokens = 0
311        async with await self.client.chat.completions.create(
312            model=self.model,
313            messages=full_messages,
314            max_tokens=kwargs.get("max_tokens", 4096),
315            stream=True,
316        ) as s:
317            async for chunk in s:
318                delta = chunk.choices[0].delta.content if chunk.choices else None
319                if delta:
320                    yield delta
321                if chunk.usage:
322                    input_tokens  = chunk.usage.prompt_tokens
323                    output_tokens = chunk.usage.completion_tokens
324        yield {
325            "input_tokens":  input_tokens,
326            "output_tokens": output_tokens,
327            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
328        }

NVIDIA NIM — OpenAI-compatible API hosted at integrate.api.nvidia.com. Free tier: 1000 requests/month per model. No local GPU required.

Popular free models: meta/llama-3.1-8b-instruct — fast, lightweight meta/llama-3.3-70b-instruct — strong general purpose mistralai/mistral-7b-instruct-v0.3 — fast & capable mistralai/mixtral-8x7b-instruct-v0.1 google/gemma-3-27b-it microsoft/phi-3-mini-128k-instruct deepseek-ai/deepseek-r1 — reasoning model deepseek-ai/deepseek-r1-distill-qwen-7b nvidia/llama-3.1-nemotron-70b-instruct nvidia/llama-3.3-nemotron-super-49b-v1

Get a free API key at: https://build.nvidia.com

NIMProvider( model: str = 'meta/llama-3.3-70b-instruct', api_key: Optional[str] = None, base_url: str = 'https://integrate.api.nvidia.com/v1')
277    def __init__(
278        self,
279        model:    str = "meta/llama-3.3-70b-instruct",
280        api_key:  Optional[str] = None,
281        base_url: str = NIM_BASE_URL,
282    ):
283        import openai
284        self.model  = model
285        self.client = openai.AsyncOpenAI(
286            api_key=api_key or "dummy",   # NIM free tier may not require a key locally
287            base_url=base_url,
288        )
NIM_BASE_URL = 'https://integrate.api.nvidia.com/v1'
model
client
async def complete( self, messages: list[dict], system: str = '', **kwargs) -> tuple[str, dict]:
290    async def complete(self, messages: list[dict], system: str = "", **kwargs) -> tuple[str, dict]:
291        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
292        response = await self.client.chat.completions.create(
293            model=self.model,
294            messages=full_messages,
295            max_tokens=kwargs.get("max_tokens", 4096),
296        )
297        text = response.choices[0].message.content
298        input_tok  = response.usage.prompt_tokens     if response.usage else 0
299        output_tok = response.usage.completion_tokens if response.usage else 0
300        usage = {
301            "input_tokens":  input_tok,
302            "output_tokens": output_tok,
303            "cost_usd":      _calc_cost(self.model, input_tok, output_tok),
304        }
305        return text, usage

Returns (text, usage) where usage = {input_tokens, output_tokens, cost_usd}

async def stream(self, messages: list[dict], system: str = '', **kwargs):
307    async def stream(self, messages: list[dict], system: str = "", **kwargs):
308        """Yield text chunks as they arrive. Final item is a dict with usage."""
309        full_messages = ([{"role": "system", "content": system}] if system else []) + messages
310        input_tokens = output_tokens = 0
311        async with await self.client.chat.completions.create(
312            model=self.model,
313            messages=full_messages,
314            max_tokens=kwargs.get("max_tokens", 4096),
315            stream=True,
316        ) as s:
317            async for chunk in s:
318                delta = chunk.choices[0].delta.content if chunk.choices else None
319                if delta:
320                    yield delta
321                if chunk.usage:
322                    input_tokens  = chunk.usage.prompt_tokens
323                    output_tokens = chunk.usage.completion_tokens
324        yield {
325            "input_tokens":  input_tokens,
326            "output_tokens": output_tokens,
327            "cost_usd":      _calc_cost(self.model, input_tokens, output_tokens),
328        }

Yield text chunks as they arrive. Final item is a dict with usage.

class MainActor(wactorz.LLMAgent):
 765class MainActor(LLMAgent):
 766    DESCRIPTION  = "Main orchestrator: spawns agents, routes tasks, manages the multi-agent system"
 767    CAPABILITIES = ["spawn_agent", "list_agents", "list_nodes", "list_topics", "orchestration"]
 768
 769    INTENT_CLASSIFIER_PROMPT = (
 770        "You are a routing classifier for a smart home AI assistant.\n"
 771        "Respond with exactly one token: ACTUATE, HA, PIPELINE, or OTHER.\n\n"
 772        "ACTUATE = immediate one-shot device control in Home Assistant:\n"
 773        "  - Turn on/off a device right now\n"
 774        "  - Set temperature, dim lights, lock/unlock door\n"
 775        "  - Open/close covers or blinds right now\n"
 776        "  - Any direct command whose whole purpose is immediate device control\n\n"
 777        "HA = Home Assistant management, listing, or automation CRUD:\n"
 778        "  - List devices, areas, entities, automations\n"
 779        "  - Create/edit/delete a HA automation\n"
 780        "  - Query what devices or automations exist\n\n"
 781        "PIPELINE = a reactive rule that should run continuously:\n"
 782        "  - 'if X happens then do Y' — any conditional/reactive logic\n"
 783        "  - 'when X send me a message/notification'\n"
 784        "  - 'whenever X turns on/off do Y'\n"
 785        "  - Any rule involving a sensor state change triggering an action or notification\n"
 786        "  - Any webcam/camera detection triggering anything\n"
 787        "  - Anything involving Discord/Telegram notifications triggered by an event\n\n"
 788        "OTHER = general conversation, coding, questions, or mixed requests.anything not HA or pipeline related.\n\n"
 789        "Important:\n"
 790        "- Choose ACTUATE only when the entire request is immediate device control.\n"
 791        "- If the request mixes device control with non-HA tasks, return OTHER.\n"
 792        "- If the request is about automations, listing, discovery, or CRUD, return HA."
 793    )
 794
 795    def __init__(self, llm_provider: Optional[LLMProvider] = None, **kwargs):
 796        kwargs.setdefault("name", "main")
 797        kwargs.setdefault("system_prompt", ORCHESTRATOR_PROMPT)
 798        super().__init__(llm_provider=llm_provider, **kwargs)
 799        self._result_futures: dict[str, asyncio.Future] = {}
 800        # Queued monitor notifications — prepended to next user response
 801        self._pending_notifications: list[dict] = []
 802        self.protected = True
 803        # Remote node tracking: node_name → {"last_seen": float, "agents": [...]}
 804        self._known_nodes: dict[str, dict] = {}
 805        # Topic registry: topic → [manifest, ...] — built from agents/+/manifest
 806        self._topic_registry: dict[str, list] = {}  # topic → list of agent manifests
 807        self._agent_manifests: dict[str, dict] = {}  # agent name → latest manifest (includes schemas)
 808
 809    # ── Lifecycle ──────────────────────────────────────────────────────────
 810
 811    async def on_start(self):
 812        await super().on_start()
 813        await self._restore_spawned_agents()
 814        # Listen for remote node heartbeats so we know what's online
 815        self._tasks.append(asyncio.create_task(self._node_heartbeat_listener()))
 816        # Listen for agent capability manifests to build topic registry
 817        self._tasks.append(asyncio.create_task(self._manifest_listener()))
 818        # Inject persisted user facts into system prompt
 819        self._inject_user_facts_into_prompt()
 820
 821    # ── Spawn registry ─────────────────────────────────────────────────────
 822
 823    def _get_spawn_registry(self) -> dict:
 824        return self.recall(SPAWN_REGISTRY_KEY) or {}
 825
 826    def _save_to_spawn_registry(self, config: dict):
 827        reg = self._get_spawn_registry()
 828        reg[config["name"]] = config
 829        self.persist(SPAWN_REGISTRY_KEY, reg)
 830        logger.info(f"[{self.name}] Spawn registry: {list(reg.keys())}")
 831
 832    def _remove_from_spawn_registry(self, name: str):
 833        reg = self._get_spawn_registry()
 834        if name in reg:
 835            del reg[name]
 836            self.persist(SPAWN_REGISTRY_KEY, reg)
 837            logger.info(f"[{self.name}] Removed '{name}' from spawn registry.")
 838
 839    # ── Pipeline rules registry ────────────────────────────────────────────
 840    # Stores grouped rules: one entry per user request, listing all agents spawned for it.
 841    # Schema: { rule_id: { "rule_id", "task", "agents": [str], "created_at": float } }
 842
 843    def get_pipeline_rules(self) -> dict:
 844        return self.recall(PIPELINE_RULES_KEY) or {}
 845
 846    def save_pipeline_rule(self, rule: dict):
 847        rules = self.get_pipeline_rules()
 848        rules[rule["rule_id"]] = rule
 849        self.persist(PIPELINE_RULES_KEY, rules)
 850        logger.info(f"[{self.name}] Pipeline rule saved: {rule['rule_id']} agents={rule.get('agents', [])}")
 851
 852    def get_notification_urls(self) -> dict:
 853        """Return persisted notification webhook URLs (discord, telegram, slack, etc.)"""
 854        return self.recall("_notification_urls") or {}
 855
 856    # ── User facts ─────────────────────────────────────────────────────────
 857    # Key facts extracted from conversation: HA URL, entity names, preferences,
 858    # user name, webhook URLs, etc. Stored separately from history so they
 859    # survive summarization and persist indefinitely.
 860
 861    _FACTS_EXTRACT_PROMPT = (
 862        "Extract durable facts from this conversation exchange that would be useful to remember "
 863        "long-term. Focus on: names, locations, device entity IDs, URLs, credentials, preferences, "
 864        "configurations, and any explicit statements about the user's setup.\n"
 865        "Return a JSON object with short descriptive keys and concise values. "
 866        "Return {} if nothing worth remembering was said.\n"
 867        "Example: {\"ha_url\": \"http://192.168.1.10:8123\", \"user_name\": \"Alex\", "
 868        "\"living_room_light\": \"light.wiz_rgbw_tunable_02cba0\"}\n"
 869        "Output only valid JSON. No explanation, no markdown."
 870    )
 871
 872    def get_user_facts(self) -> dict:
 873        return self.recall("_user_facts") or {}
 874
 875    def _inject_user_facts_into_prompt(self):
 876        """Prepend known user facts to the system prompt so the LLM always has them."""
 877        facts = self.get_user_facts()
 878        if not facts:
 879            return
 880        facts_lines = "\n".join(f"  {k}: {v}" for k, v in facts.items())
 881        facts_block = f"\n\n== KNOWN USER FACTS (always keep in mind) ==\n{facts_lines}"
 882        # Avoid duplicating if already injected
 883        marker = "== KNOWN USER FACTS"
 884        base_prompt = ORCHESTRATOR_PROMPT
 885        if marker in self.system_prompt:
 886            # Replace existing facts block
 887            self.system_prompt = base_prompt + facts_block
 888        else:
 889            self.system_prompt = self.system_prompt + facts_block
 890
 891    async def _extract_and_save_facts(self, user_message: str, assistant_response: str):
 892        """After each exchange, ask the LLM to extract any new durable facts."""
 893        if self.llm is None:
 894            return
 895        exchange = f"USER: {user_message[:600]}\nASSISTANT: {assistant_response[:600]}"
 896        try:
 897            raw, _ = await self.llm.complete(
 898                messages=[{"role": "user", "content": exchange}],
 899                system=self._FACTS_EXTRACT_PROMPT,
 900                max_tokens=200,
 901            )
 902            import json as _json, re as _re
 903            clean = raw.strip().lstrip("```json").lstrip("```").rstrip("```").strip()
 904            new_facts = _json.loads(clean)
 905            if not isinstance(new_facts, dict) or not new_facts:
 906                return
 907            # Merge with existing facts
 908            facts = self.get_user_facts()
 909            facts.update(new_facts)
 910            self.persist("_user_facts", facts)
 911            self._inject_user_facts_into_prompt()
 912            logger.info(f"[{self.name}] User facts updated: {list(new_facts.keys())}")
 913        except Exception as e:
 914            logger.info(f"[{self.name}] Facts extraction skipped: {e}")
 915
 916    async def delete_pipeline_rule(self, rule_id: str) -> str:
 917        """Stop all agents for a rule and remove it from registry."""
 918        rules = self.get_pipeline_rules()
 919        rule = rules.get(rule_id)
 920        if not rule:
 921            return f"No rule found with id '{rule_id}'."
 922        agents = rule.get("agents", [])
 923        stopped = []
 924        for agent_name in agents:
 925            self._remove_from_spawn_registry(agent_name)
 926            if self._registry:
 927                actor = self._registry.find_by_name(agent_name)
 928                if actor:
 929                    await actor.stop()
 930                    await self._registry.unregister(actor.actor_id)
 931                    stopped.append(agent_name)
 932        del rules[rule_id]
 933        self.persist(PIPELINE_RULES_KEY, rules)
 934        task_preview = rule.get("task", "")[:60]
 935        return f"Rule '{rule_id}' deleted. Stopped agents: {', '.join(stopped) or 'none running'}.\nRule was: {task_preview}"
 936
 937    async def _restore_spawned_agents(self):
 938        reg = self._get_spawn_registry()
 939        if not reg:
 940            return
 941        logger.info(f"[{self.name}] Restoring {len(reg)} agent(s): {list(reg.keys())}")
 942        for name, config in reg.items():
 943            node = config.get("node", "").strip()
 944            if node:
 945                # Remote agent — re-publish spawn to its node; no local object expected
 946                logger.info(f"[{self.name}] Re-spawning remote agent '{name}' on node '{node}'")
 947                try:
 948                    await self._spawn_remote(config, node, save=False)
 949                except Exception as e:
 950                    logger.error(f"[{self.name}] Failed to restore remote '{name}' on '{node}': {e}")
 951                continue
 952            if self._registry and self._registry.find_by_name(name):
 953                logger.info(f"[{self.name}] '{name}' already running, skipping.")
 954                continue
 955            try:
 956                await self._spawn_from_config(config, save=False)
 957                logger.info(f"[{self.name}] Restored: {name}")
 958            except Exception as e:
 959                logger.error(f"[{self.name}] Failed to restore '{name}': {e}")
 960
 961    # ── Message handling ───────────────────────────────────────────────────
 962
 963    async def handle_message(self, msg: Message):
 964        if msg.type == MessageType.TASK:
 965            # Intercept monitor notifications BEFORE passing to LLM _handle_task
 966            if isinstance(msg.payload, dict) and msg.payload.get("_monitor_notification"):
 967                self._pending_notifications.append(msg.payload)
 968                logger.info(f"[{self.name}] Monitor alert queued: {msg.payload.get('message','')[:80]}")
 969                return
 970            await self._handle_task(msg)
 971
 972        elif msg.type == MessageType.RESULT:
 973            if isinstance(msg.payload, dict):
 974                # Support both key names: "_task_id" (new) and "task" (legacy)
 975                fid = msg.payload.get("_task_id") or msg.payload.get("task")
 976                if fid and fid in self._result_futures:
 977                    fut = self._result_futures[fid]
 978                    if not fut.done():
 979                        fut.set_result(msg.payload)
 980
 981    # ── Home Automation intent detection ───────────────────────────────────
 982
 983    @staticmethod
 984    def _looks_like_home_automation_request(text: str) -> bool:
 985        lowered = (text or "").lower()
 986        if "home assistant" in lowered:
 987            return True
 988        if lowered.startswith("spawn ") or lowered.startswith("/"):
 989            return False
 990
 991        # Wactorz pipeline requests — these involve external sensors/agents, not HA natively
 992        # Route to planner instead of HA agent
 993        _pipeline_keywords = [
 994            "camera", "webcam", "yolo", "detect", "detection", "person detect",
 995            "object detect", "laptop camera", "cv2", "opencv",
 996            "when detected", "if detected", "whenever detected",
 997            "notify me", "send me a message", "send me a discord",
 998            "discord", "telegram", "whatsapp",
 999        ]
1000        if any(kw in lowered for kw in _pipeline_keywords):
1001            return False
1002
1003        has_trigger = any(token in lowered for token in [
1004            "when ", "if ", "on ", "whenever ", "after ", "before ",
1005            "as soon as ", "at ",
1006        ])
1007        has_action = any(token in lowered for token in [
1008            "turn on", "turn off", "open", "close", "lock", "unlock", "dim", "set",
1009        ])
1010        has_automation_intent = any(token in lowered for token in [
1011            "automate", "automation", "routine", "scene", "trigger", "schedule",
1012            "presence", "motion", "door", "window", "sensor", "alarm",
1013            "romantic", "cozy", "ambience", "ambiance",
1014        ])
1015        has_home_context = any(token in lowered for token in [
1016            "home", "house", "apartment", "room", "living room", "bedroom",
1017            "kitchen", "hallway", "garage", "porch",
1018        ])
1019
1020        return (
1021            (has_trigger and has_action)
1022            or (has_trigger and has_automation_intent)
1023            or (has_automation_intent and has_home_context)
1024        )
1025
1026    async def _classify_intent(self, text: str) -> str:
1027        """
1028        Classify user intent as ACTUATE, HA, PIPELINE, or OTHER using a single cheap LLM call.
1029        Returns one of: 'ACTUATE', 'HA', 'PIPELINE', 'OTHER'
1030        """
1031        if not text or text.startswith("/"):
1032            return "OTHER"
1033        if self.llm is None:
1034            return "OTHER"
1035        try:
1036            decision, _ = await asyncio.wait_for(
1037                self.llm.complete(
1038                    messages=[{"role": "user", "content": text}],
1039                    system=self.INTENT_CLASSIFIER_PROMPT,
1040                    max_tokens=10,
1041                    reasoning_effort="none",
1042                ),
1043                timeout=60.0,
1044            )
1045            token = (decision or "").strip().upper().split()[0] if decision else "OTHER"
1046            if token in ("HA", "PIPELINE", "OTHER", "ACTUATE"):
1047                return token
1048            return "OTHER"
1049        except asyncio.TimeoutError:
1050            logger.warning(f"[{self.name}] Intent classification timed out after 60s")
1051            return "OTHER"
1052        except Exception as e:
1053            logger.debug(f"[{self.name}] Intent classification failed: {e}")
1054            return "OTHER"
1055            
1056            
1057    async def _handle_actuate_intent(self, text: str) -> str:
1058        if not CONFIG.ha_url or not CONFIG.ha_token:
1059            return "Home Assistant is not configured. Set `HA_URL` and `HA_TOKEN` in your .env file."
1060
1061        from .one_off_actuator_agent import OneOffActuatorAgent
1062
1063        # ── Enrich the request with HA entity context ──────────────────────
1064        # The OneOffActuatorAgent needs to resolve "lamp" → "light.wiz_rgbw_tunable_02cba0".
1065        # Without entity context, it fails with "couldn't identify a matching device".
1066        # Fetch entities via the home-assistant-agent (cached + fast) and inject
1067        # the relevant matches into the request so the LLM can pick the right one.
1068        enriched_text = text
1069        try:
1070            if self._registry:
1071                ha_agent = self._registry.find_by_name("home-assistant-agent")
1072                if ha_agent:
1073                    # Use a unique task_id so the future resolves correctly
1074                    _ha_task_id = f"actuate_entities_{uuid.uuid4().hex[:8]}"
1075                    _ha_future: asyncio.Future = asyncio.get_running_loop().create_future()
1076                    self._result_futures[_ha_task_id] = _ha_future
1077                    await self.send(ha_agent.actor_id, MessageType.TASK, {
1078                        "text": "list_entities",
1079                        "_task_id": _ha_task_id,
1080                        "task": _ha_task_id,
1081                        "reply_to": self.actor_id,
1082                    })
1083                    try:
1084                        ha_result = await asyncio.wait_for(_ha_future, timeout=10.0)
1085                    except asyncio.TimeoutError:
1086                        ha_result = None
1087                    finally:
1088                        self._result_futures.pop(_ha_task_id, None)
1089
1090                    entities = []
1091                    if ha_result and isinstance(ha_result, dict):
1092                        entities = ha_result.get("entities", []) or ha_result.get("result", [])
1093                    if isinstance(entities, list) and entities:
1094                        # Build a compact entity summary for the LLM
1095                        entity_lines = []
1096                        for e in entities[:300]:
1097                            eid = e.get("entity_id", "")
1098                            name = e.get("name", "") or e.get("friendly_name", "")
1099                            if eid:
1100                                entry = eid
1101                                if name and name != eid:
1102                                    entry += f" ({name})"
1103                                entity_lines.append(entry)
1104                        if entity_lines:
1105                            enriched_text = (
1106                                f"{text}\n\n"
1107                                f"[AVAILABLE HA ENTITIES — match the user's device to one of these:\n"
1108                                + "\n".join(f"  {e}" for e in entity_lines)
1109                                + "\n]"
1110                            )
1111                            logger.info(
1112                                f"[{self.name}] Enriched actuate request with "
1113                                f"{len(entity_lines)} HA entities"
1114                            )
1115        except Exception as e:
1116            logger.warning(f"[{self.name}] Could not fetch HA entities for actuate: {e}")
1117
1118        task_id = f"actuate_{uuid.uuid4().hex[:8]}"
1119        future: asyncio.Future = asyncio.get_running_loop().create_future()
1120        self._result_futures[task_id] = future
1121
1122        try:
1123            await self.spawn(
1124                OneOffActuatorAgent,
1125                request=enriched_text,
1126                llm_provider=self.llm,
1127                task_id=task_id,
1128                reply_to_id=self.actor_id,
1129                persistence_dir=str(self._persistence_dir.parent),
1130            )
1131            result = await asyncio.wait_for(future, timeout=120.0)
1132            return result.get("result", "Done.")
1133        except asyncio.TimeoutError:
1134            return "Actuation timed out, please retry."
1135        finally:
1136            self._result_futures.pop(task_id, None)
1137
1138    async def _is_home_automation_request(self, text: str) -> bool:
1139        # Keep for backward compat — delegates to _classify_intent
1140        intent = await self._classify_intent(text)
1141        return intent == "HA"
1142
1143    # ── User input ─────────────────────────────────────────────────────────
1144
1145    async def chat(self, user_message: str) -> str:
1146        response = await super().chat(user_message)
1147        # Fire-and-forget fact extraction — don't block the response
1148        asyncio.create_task(self._extract_and_save_facts(user_message, response))
1149        return response
1150
1151    async def chat_stream(self, user_message: str):
1152        full_response = []
1153        async for chunk in super().chat_stream(user_message):
1154            if isinstance(chunk, dict):
1155                yield chunk
1156            else:
1157                full_response.append(chunk)
1158                yield chunk
1159        # Extract facts from completed response
1160        if full_response:
1161            asyncio.create_task(
1162                self._extract_and_save_facts(user_message, "".join(full_response))
1163            )
1164
1165
1166    def _drain_notifications(self) -> str:
1167        """Pop queued monitor notifications as a formatted prefix string."""
1168        if not self._pending_notifications:
1169            return ""
1170        icons = {"critical": "\U0001f534", "warning": "\U0001f7e1", "info": "\u2705"}
1171        lines = []
1172        for n in self._pending_notifications:
1173            icon = icons.get(n.get("severity", "warning"), "\u26a0\ufe0f")
1174            lines.append(f"{icon} **System:** {n.get('message', '').strip()}")
1175        self._pending_notifications.clear()
1176        return "\n".join(lines) + "\n\n---\n\n"
1177
1178    async def process_user_input(self, text: str) -> str:
1179        note_prefix = self._drain_notifications()
1180
1181        # ── Direct API intercepts — handle without LLM round-trip ──────────
1182        stripped = text.strip().rstrip("()")
1183
1184        # ── /help ───────────────────────────────────────────────────────────
1185        if stripped in ("/help", "help", "/?"):
1186            return note_prefix + "\n".join([
1187                "**Wactorz commands**",
1188                "",
1189                "**Agents**",
1190                "  /agents               — list all known agents with descriptions and schemas",
1191                "  /agents <keyword>     — filter agents by capability keyword",
1192                "  /capabilities         — alias for /agents",
1193                "  /agents stop <name>   — stop and remove an agent (local or remote)",
1194                "  /agents delete <name> — alias for /agents stop",
1195                "  @agent-name <msg>     — send a message directly to a named agent",
1196                "  @catalog list         — list available catalog recipes",
1197                "  @catalog spawn <n>    — spawn a catalog agent",
1198                "",
1199                "**Nodes**",
1200                "  /nodes                — list remote nodes and their agents",
1201                "  /nodes remove <node>  — stop all agents on a node and remove it",
1202                "",
1203                "**Pipelines**",
1204                "  /rules                — list active pipeline rules",
1205                "  /rules delete <id>    — stop agents and remove a rule",
1206                "",
1207                "**Memory**",
1208                "  /memory               — show stored user facts and conversation summary",
1209                "  /memory clear         — wipe all memory",
1210                "  /memory forget <key>  — remove one stored fact",
1211                "",
1212                "**Notifications**",
1213                "  /webhook discord <url>   — store a Discord webhook URL",
1214                "  /webhook telegram <url>  — store a Telegram webhook URL",
1215                "  /webhook                 — list stored webhook URLs",
1216                "",
1217                "**System**",
1218                "  /nodes                — list remote nodes and their agents",
1219                "  /topics               — list MQTT topics published by known agents",
1220                "  /topics <keyword>     — filter topics by keyword",
1221                "  /bus                  — TopicBus registry: contracts, data flows, wiring pairs",
1222                "  /mqtt                 — MQTT publisher status (connected, queue depth, outbox)",
1223                "  /help                 — show this help",
1224            ])
1225        if stripped in ("main.list_nodes", "list_nodes", "/nodes"):
1226            nodes = self.list_nodes()
1227            if not nodes:
1228                return note_prefix + "No remote nodes seen yet. Deploy one with /deploy <node-name>."
1229            import time as _t
1230            lines = []
1231            for nd in sorted(nodes, key=lambda x: x["node"]):
1232                status   = "🟢 online" if nd["online"] else "🔴 offline"
1233                agents   = ", ".join(nd["agents"]) or "(no agents)"
1234                age      = int(_t.time() - nd["last_seen"])
1235                lines.append(f"  {nd['node']:22s} {status}  |  agents: {agents}  |  last heartbeat: {age}s ago")
1236            return note_prefix + "Remote nodes:\n" + "\n".join(lines) + "\nTo remove a node: /nodes remove <node-name>"
1237
1238        if stripped.startswith("/topics"):
1239            keyword = stripped[7:].strip().lstrip("(").rstrip(")")
1240            topics = self.list_topics(keyword)
1241            if not topics:
1242                msg = f"No topics found" + (f" matching '{keyword}'" if keyword else "") + "."
1243                msg += " Topics are registered automatically when agents publish for the first time."
1244                return note_prefix + msg
1245            lines = [f"Known MQTT topics{' matching ' + repr(keyword) if keyword else ''}:"]
1246            for t in topics:
1247                agent_strs = ", ".join(
1248                    f"{a['name']}" + (f" ({a['node']})" if a.get("node") else "")
1249                    for a in t["agents"]
1250                )
1251                lines.append(f"  {t['topic']:40s}{agent_strs}")
1252            return note_prefix + "\n".join(lines)
1253            
1254        if stripped == "/mqtt":
1255            client = self._mqtt_client
1256            if client is None:
1257                return note_prefix + "MQTT publisher not initialised."
1258            connected   = getattr(client, "connected",   False)
1259            queue_depth = getattr(client, "queue_depth", 0)
1260            client_id   = getattr(client, "_client_id",  "?")
1261            db_path     = getattr(client, "_db_path",    "?")
1262            status_icon = "🟢" if connected else "🔴"
1263            lines = [
1264                f"MQTT Publisher Status:",
1265                f"  {status_icon} connected   : {connected}",
1266                f"  client_id   : {client_id}",
1267                f"  queue_depth : {queue_depth} message(s) pending",
1268                f"  outbox_db   : {db_path}",
1269                f"  QoS 1 topics: nodes/*, agents/by-name/*",
1270                f"  QoS 0 topics: */logs, */metrics, */status, */heartbeat",
1271            ]
1272            if queue_depth > 0:
1273                lines.append(f"  ⚠️  {queue_depth} message(s) queued — will deliver when reconnected")
1274            return note_prefix + "\n".join(lines)
1275
1276        if stripped == "/bus":
1277            try:
1278                from ..core.topic_bus import get_topic_bus
1279                bus = get_topic_bus()
1280                if not bus:
1281                    return note_prefix + "TopicBus not initialised."
1282                summary = bus.registry.summary()
1283                lines = [
1284                    f"TopicBus — Reactive Pub/Sub Registry",
1285                    f"  agents with contracts : {summary['total_agents']}",
1286                    f"  published topics      : {summary['total_published']}",
1287                    f"  subscribed topics     : {summary['total_subscribed']}",
1288                    f"  auto-wiring pairs     : {summary['wiring_pairs']}",
1289                    "",
1290                ]
1291                for c in sorted(summary["agents"], key=lambda x: x["name"]):
1292                    lines.append(f"  [{c['name']}]" + (f" on {c['node']}" if c.get("node") else ""))
1293                    if c["publishes"]:
1294                        lines.append(f"    publishes : {', '.join(c['publishes'])}")
1295                    if c["subscribes"]:
1296                        lines.append(f"    subscribes: {', '.join(c['subscribes'])}")
1297                    if c.get("triggers_when"):
1298                        lines.append(f"    triggers  : {c['triggers_when']}")
1299                pairs = bus.registry.find_wiring_opportunities()
1300                if pairs:
1301                    lines.append("\nAuto-wiring opportunities:")
1302                    for prod, cons, topic in pairs:
1303                        lines.append(f"  {prod.name}{cons.name}  via {topic}")
1304                return note_prefix + "\n".join(lines)
1305            except Exception as e:
1306                return note_prefix + f"TopicBus error: {e}"
1307
1308
1309
1310        # ── Webhook / notification URL management ───────────────────────────
1311        if stripped.startswith("/memory"):
1312            parts = stripped.split(None, 1)
1313            sub = parts[1].strip() if len(parts) > 1 else ""
1314            if sub == "clear":
1315                self.persist("_user_facts", {})
1316                self.persist("history_summary", "")
1317                self._history_summary = ""
1318                self.system_prompt = ORCHESTRATOR_PROMPT
1319                return note_prefix + "Memory cleared — user facts and conversation summary reset."
1320            if sub.startswith("forget "):
1321                key = sub[7:].strip()
1322                facts = self.get_user_facts()
1323                if key in facts:
1324                    del facts[key]
1325                    self.persist("_user_facts", facts)
1326                    self._inject_user_facts_into_prompt()
1327                    return note_prefix + f"Forgotten: '{key}'"
1328                return note_prefix + f"No fact found with key '{key}'."
1329            # Default: show memory
1330            facts = self.get_user_facts()
1331            summary = self._history_summary
1332            lines = []
1333            if facts:
1334                lines.append(f"User facts ({len(facts)}):")
1335                for k, v in facts.items():
1336                    lines.append(f"  {k}: {v}")
1337            else:
1338                lines.append("No user facts stored yet.")
1339            if summary:
1340                lines.append(f"\nConversation summary:\n  {summary[:300]}{'...' if len(summary) > 300 else ''}")
1341            else:
1342                lines.append("\nNo conversation summary yet.")
1343            lines.append("\nCommands: /memory clear | /memory forget <key>")
1344            return note_prefix + "\n".join(lines)
1345
1346        if stripped.startswith("/webhook"):
1347            parts = stripped.split(None, 2)
1348            if len(parts) == 1:
1349                # /webhook — show stored URLs
1350                urls = self.recall("_notification_urls") or {}
1351                if not urls:
1352                    return note_prefix + "No notification URLs stored.\nUse: /webhook discord <url>  or  /webhook telegram <url>"
1353                lines = ["Stored notification URLs:"]
1354                for svc, url in urls.items():
1355                    lines.append(f"  {svc}: {url}")
1356                return note_prefix + "\n".join(lines)
1357            elif len(parts) >= 3:
1358                # /webhook discord <url>
1359                service = parts[1].lower()
1360                url = parts[2].strip()
1361                urls = self.recall("_notification_urls") or {}
1362                urls[service] = url
1363                self.persist("_notification_urls", urls)
1364                return note_prefix + f"Saved {service} webhook URL. Pipelines will use it automatically."
1365            else:
1366                return note_prefix + "Usage: /webhook <service> <url>\nExample: /webhook discord https://discord.com/api/webhooks/..."
1367
1368        # Auto-detect webhook URLs in any message and persist them
1369        import re as _re
1370        _webhook_match = _re.search(
1371            r'https?://(?:discord\.com/api/webhooks|hooks\.slack\.com|api\.telegram\.org)/\S+',
1372            text
1373        )
1374        if _webhook_match:
1375            url = _webhook_match.group(0).rstrip(".,;!)'\"")
1376            urls = self.recall("_notification_urls") or {}
1377            if "discord" in url:
1378                urls["discord"] = url
1379            elif "slack" in url:
1380                urls["slack"] = url
1381            elif "telegram" in url:
1382                urls["telegram"] = url
1383            self.persist("_notification_urls", urls)
1384            logger.info(f"[{self.name}] Auto-saved webhook URL from message")
1385
1386        if stripped in ("/rules", "rules"):
1387            rules = self.get_pipeline_rules()
1388            if not rules:
1389                return note_prefix + "No pipeline rules active.\nDescribe a reactive rule to create one, e.g. 'when the door opens send me a Discord message'."
1390            lines = [f"Active pipeline rules ({len(rules)}):"]
1391            for rule_id, rule in sorted(rules.items(), key=lambda x: x[1].get("created_at", 0)):
1392                agents = rule.get("agents", [])
1393                task = rule.get("task", "")[:80]
1394                import datetime
1395                ts = rule.get("created_at", 0)
1396                created = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M") if ts else "unknown"
1397                # Check which agents are running
1398                running_agents = []
1399                stopped_agents = []
1400                for a in agents:
1401                    if self._registry and self._registry.find_by_name(a):
1402                        running_agents.append(a)
1403                    else:
1404                        stopped_agents.append(a)
1405                status = "🟢" if running_agents else "🔴"
1406                lines.append(f"\n{status} [{rule_id}] — {task}")
1407                lines.append(f"   agents  : {', '.join(agents)}")
1408                if stopped_agents:
1409                    lines.append(f"   stopped : {', '.join(stopped_agents)}")
1410                lines.append(f"   created : {created}")
1411            lines.append("\nTo delete a rule: /rules delete <rule_id>")
1412            return note_prefix + "\n".join(lines)
1413
1414        if stripped.startswith("/rules delete "):
1415            rule_id = stripped[len("/rules delete "):].strip()
1416            result = await self.delete_pipeline_rule(rule_id)
1417            return note_prefix + result
1418
1419        # ── /agents stop|delete|pause <name> ───────────────────────────────
1420        for _cmd in ("/agents stop ", "/agents delete ", "/agents pause ", "/agents remove "):
1421            if stripped.startswith(_cmd):
1422                agent_name = stripped[len(_cmd):].strip()
1423                reg        = self._get_spawn_registry()
1424                node       = reg.get(agent_name, {}).get("node", "").strip()
1425
1426                # Remove from spawn registry so it doesn't restore on restart
1427                self._remove_from_spawn_registry(agent_name)
1428
1429                if node:
1430                    # Remote agent — publish stop + clear desired state
1431                    await self._update_node_desired_state(node, remove_name=agent_name)
1432                    await self._mqtt_publish(
1433                        f"nodes/{node}/stop", {"name": agent_name}, qos=1
1434                    )
1435                    return note_prefix + f"Stop signal sent to '{agent_name}' on node '{node}'."
1436                else:
1437                    # Local agent
1438                    if self._registry:
1439                        target = self._registry.find_by_name(agent_name)
1440                        if target:
1441                            await self._registry.unregister(target.actor_id)
1442                            await target.stop()
1443                            return note_prefix + f"Agent '{agent_name}' stopped."
1444                    return note_prefix + f"Agent '{agent_name}' not found locally."
1445
1446        # ── /nodes remove <node> ────────────────────────────────────────────
1447        if stripped.startswith("/nodes remove "):
1448            node_name = stripped[len("/nodes remove "):].strip()
1449            # Clear retained MQTT messages
1450            await self._mqtt_publish(f"nodes/{node_name}/spawn",         b"", retain=True)
1451            await self._mqtt_publish(f"nodes/{node_name}/desired_state", b"", retain=True)
1452            await self._mqtt_publish(f"nodes/{node_name}/stop_all",      {"reason": "removed"}, qos=1)
1453            # Remove all agents for this node from spawn registry
1454            reg     = self._get_spawn_registry()
1455            removed = [n for n, c in reg.items() if c.get("node", "") == node_name]
1456            for n in removed:
1457                self._remove_from_spawn_registry(n)
1458            self._known_nodes.pop(node_name, None)
1459            return note_prefix + (
1460                f"Node '{node_name}' removed. "
1461                f"Cleared {len(removed)} agent(s): {', '.join(removed) or 'none'}. "
1462                f"The node will disappear from /nodes within 30s."
1463            )
1464
1465        # ── /agents / /capabilities ─────────────────────────────────────────
1466        if stripped in ("/agents", "/capabilities") or \
1467                stripped.startswith("/agents ") or stripped.startswith("/capabilities "):
1468            keyword = ""
1469            for prefix in ("/capabilities ", "/agents "):
1470                if stripped.startswith(prefix):
1471                    keyword = stripped[len(prefix):].strip()
1472                    break
1473            caps = self.list_capabilities(keyword)
1474            if not caps:
1475                msg = "No agents found" + (f" matching {repr(keyword)}" if keyword else "") + "."
1476                msg += " Agents publish their capabilities on startup."
1477                return note_prefix + msg
1478            lines = ["Agent capabilities" + (" matching " + repr(keyword) if keyword else "") + ":"]
1479            for a in caps:
1480                running  = "\U0001f7e2" if a["running"] else ("\U0001f4e6" if a["spawnable"] else "\U0001f534")
1481                node_str = f" on {a['node']}" if a.get("node") else ""
1482                lines.append("")
1483                lines.append(f"  {running} [{a['name']}]{node_str}")
1484                lines.append(f"    description : {a['description']}")
1485                if a["capabilities"]:
1486                    lines.append(f"    capabilities: {', '.join(a['capabilities'])}")
1487                if a["input_schema"]:
1488                    lines.append(f"    input       : {a['input_schema']}")
1489                if a["output_schema"]:
1490                    lines.append(f"    output      : {a['output_schema']}")
1491                if a["spawnable"]:
1492                    lines.append(f"    spawnable   : yes — @catalog spawn {a['name']}")
1493            lines.append("\nLegend: \U0001f7e2 running  \U0001f4e6 spawnable (not yet running)  \U0001f534 stopped")
1494            lines.append("Filter: /agents <keyword>   e.g. /agents discord")
1495            return note_prefix + "\n".join(lines)
1496
1497                # ── @mention direct routing ─────────────────────────────────────────
1498        if text.startswith("@"):
1499            # Extract agent name and message: "@cpu-monitor-rpi-room what is the cpu?"
1500            parts       = text.split(None, 1)
1501            target_name = parts[0].lstrip("@").rstrip(":,")
1502            message     = parts[1].strip() if len(parts) > 1 else text
1503
1504            # Try local registry first
1505            local_target = self._registry.find_by_name(target_name) if self._registry else None
1506            if not local_target:
1507                # Not running — check if it's a spawnable catalog recipe
1508                manifest = self._agent_manifests.get(target_name, {})
1509                if manifest.get("spawnable") and manifest.get("catalog"):
1510                    catalog_name  = manifest["catalog"]
1511                    catalog_actor = self._registry.find_by_name(catalog_name) if self._registry else None
1512                    if catalog_actor and hasattr(catalog_actor, "_action_spawn"):
1513                        logger.info(f"[main] '{target_name}' not running — auto-spawning via {catalog_name}...")
1514                        try:
1515                            spawn_result = await catalog_actor._action_spawn(target_name, {})
1516                            if spawn_result and spawn_result.get("ok"):
1517                                await asyncio.sleep(0.5)
1518                                local_target = self._registry.find_by_name(target_name) if self._registry else None
1519                                logger.info(f"[main] '{target_name}' spawned, routing task...")
1520                            else:
1521                                err = spawn_result.get("message", "unknown error") if spawn_result else "no response"
1522                                return note_prefix + f"Could not spawn '{target_name}': {err}"
1523                        except Exception as e:
1524                            return note_prefix + f"Could not spawn '{target_name}': {e}"
1525
1526            if local_target:
1527                result = await self.delegate_task(target_name, message, timeout=60.0)
1528                if result:
1529                    reply = result.get("result") or result.get("response") or str(result)
1530                    return note_prefix + f"**{target_name}**: {reply}"
1531                return note_prefix + f"{target_name} did not respond."
1532
1533            # Check if it's a known remote agent
1534            remote_node = None
1535            for node_name, nd in self._known_nodes.items():
1536                if target_name in nd.get("agents", []):
1537                    remote_node = node_name
1538                    break
1539
1540            if remote_node:
1541                # Send via MQTT and wait for reply
1542                import time as _t
1543                reply_topic = f"main/reply/{self.actor_id}/{uuid.uuid4().hex[:8]}"
1544                future: asyncio.Future = asyncio.get_event_loop().create_future()
1545                self._result_futures[reply_topic] = future
1546
1547                await self._mqtt_publish(
1548                    f"agents/by-name/{target_name}/task",
1549                    {"text": message, "_reply_topic": reply_topic,
1550                     "_remote_task": True, "payload": message},
1551                )
1552
1553                # Subscribe briefly for the reply
1554                async def _wait_reply():
1555                    try:
1556                        import aiomqtt
1557                        async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client:
1558                            await client.subscribe(reply_topic)
1559                            async for msg in client.messages:
1560                                try:
1561                                    data = json.loads(msg.payload.decode())
1562                                    if not future.done():
1563                                        future.set_result(data)
1564                                except Exception:
1565                                    pass
1566                                return
1567                    except Exception as e:
1568                        if not future.done():
1569                            future.set_exception(e)
1570
1571                reply_task = asyncio.create_task(_wait_reply())
1572                try:
1573                    result = await asyncio.wait_for(asyncio.shield(future), timeout=30.0)
1574                    reply_task.cancel()
1575                    reply = result.get("result") or result.get("response") or str(result)
1576                    return note_prefix + f"**{target_name}** (on {remote_node}): {reply}"
1577                except asyncio.TimeoutError:
1578                    reply_task.cancel()
1579                    return note_prefix + f"{target_name} on {remote_node} did not respond within 30s."
1580                finally:
1581                    self._result_futures.pop(reply_topic, None)
1582
1583            # Not found locally or remotely
1584            known_remote = [a for nd in self._known_nodes.values() for a in nd.get("agents", [])]
1585            if known_remote:
1586                return note_prefix + (f"Agent '{target_name}' not found. "
1587                    f"Remote agents: {', '.join(known_remote)}")
1588            return note_prefix + f"Agent '{target_name}' not found."
1589
1590        # Explicit planner prefix always wins
1591        lowered = text.lower()
1592        if any(lowered.startswith(p) for p in (
1593            "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ",
1594            "@planner", "set up a pipeline", "create a rule", "set up a rule",
1595        )):
1596            result = await self._run_planner(text)
1597            return note_prefix + (result or "Planner did not return a result. Please retry.")
1598
1599        # Single LLM call classifies intent: ACTUATE, HA, PIPELINE (reactive rule), OTHER
1600        intent = await self._classify_intent(text)
1601        logger.info(f"[{self.name}] Intent: {intent}{text[:60]}")
1602
1603        if intent == "PIPELINE":
1604            result = await self._run_planner(text)
1605            return note_prefix + (result or "Planner did not return a result. Please retry.")
1606            
1607        if intent == "ACTUATE":
1608            return note_prefix + await self._handle_actuate_intent(text)
1609
1610        if intent == "HA":
1611            result = await self.delegate_task("home-assistant-agent", text, timeout=120.0)
1612            if result and isinstance(result, dict) and result.get("result"):
1613                return note_prefix + str(result["result"])
1614            if not result:
1615                return note_prefix + "I could not reach the Home Assistant agent right now. Please retry."
1616            return note_prefix + "The Home Assistant agent did not return a result. Please retry."
1617
1618        response = await self.chat(text)
1619
1620        # If the LLM wrote agent code but forgot the <spawn> wrapper, remind it once
1621        has_spawn   = "<spawn>" in response
1622        has_code    = "async def handle_task" in response or "async def setup" in response
1623        asked_spawn = any(w in text.lower() for w in ("spawn", "create", "make", "build", "add", "agent"))
1624        if has_code and not has_spawn and asked_spawn:
1625            logger.info(f"[{self.name}] Code written without <spawn> — prompting to wrap it")
1626            response = await self.chat(
1627                "You wrote agent code but forgot to wrap it in a <spawn> block. "
1628                "Please output the complete spawn block now with that exact code inside it. "
1629                "Output ONLY the <spawn>...</spawn> block, nothing else."
1630            )
1631
1632        clean, spawned = await self._process_spawn_commands(response)
1633
1634        # Execute any @agent-name {payload} delegation patterns the LLM produced
1635        clean = await self._execute_llm_delegations(clean)
1636
1637        await self._mqtt_publish(
1638            f"agents/{self.actor_id}/logs",
1639            {"type": "user_interaction", "input": text[:100], "response": clean[:200]},
1640        )
1641
1642        if spawned:
1643            bg_names   = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)]
1644            live_names = [a.name for a in spawned if not isinstance(a, _SpawnPlaceholder)]
1645            parts = []
1646            if live_names:
1647                replaced = '"replace": true' in response or '"replace":true' in response
1648                action   = "Replaced" if replaced else "Spawned"
1649                parts.append(f"{action} {', '.join(live_names)}")
1650            if bg_names:
1651                parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly")
1652            if parts:
1653                clean += f"\n\n[System: {' | '.join(parts)} — will auto-restore on restart]"
1654
1655        return note_prefix + clean
1656
1657    async def process_user_input_stream(self, text: str):
1658        """
1659        Streaming version of process_user_input().
1660        Yields text chunks as the LLM generates them, then a final dict:
1661          {"done": True, "spawned": [...names...], "system_msg": "..."}
1662
1663        The CLI calls this and prints chunks immediately.
1664        REST/Discord/WhatsApp should use process_user_input() instead.
1665        """
1666        # Drain monitor notifications first
1667        note_prefix = self._drain_notifications()
1668        if note_prefix:
1669            yield note_prefix
1670
1671        # All slash-commands and direct API intercepts are handled by process_user_input
1672        # Route them there to avoid duplicating all that logic here
1673        _stripped = text.strip().rstrip("()")
1674        _is_command = (
1675            _stripped.startswith("/")
1676            or _stripped in ("list_nodes", "main.list_nodes", "rules")
1677            or _stripped.startswith("@")
1678        )
1679        if _is_command:
1680            result = await self.process_user_input(text)
1681            yield result
1682            yield {"done": True, "spawned": [], "system_msg": ""}
1683            return
1684
1685        # Explicit planner prefix always wins
1686        _lowered = text.lower()
1687        if any(_lowered.startswith(p) for p in (
1688            "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ",
1689            "@planner", "set up a pipeline", "create a rule", "set up a rule",
1690        )):
1691            result = await self._run_planner(text)
1692            yield result or "Planner did not return a result. Please retry."
1693            yield {"done": True, "spawned": [], "system_msg": ""}
1694            return
1695
1696        # Single LLM call classifies intent: ACTUATE, HA, PIPELINE, or OTHER
1697        intent = await self._classify_intent(text)
1698        logger.info(f"[{self.name}] Intent: {intent}{text[:60]}")
1699
1700        if intent == "PIPELINE":
1701            result = await self._run_planner(text)
1702            yield result or "Planner did not return a result. Please retry."
1703            yield {"done": True, "spawned": [], "system_msg": ""}
1704            return
1705            
1706        if intent == "ACTUATE":
1707            result = await self._handle_actuate_intent(text)
1708            yield result
1709            yield {"done": True, "spawned": [], "system_msg": ""}
1710            return
1711
1712        if intent == "HA":
1713            result = await self.delegate_task("home-assistant-agent", text, timeout=120.0)
1714            if result and isinstance(result, dict) and result.get("result"):
1715                yield str(result["result"])
1716            elif not result:
1717                yield "I could not reach the Home Assistant agent right now. Please retry."
1718            else:
1719                yield "The Home Assistant agent did not return a result. Please retry."
1720            yield {"done": True, "spawned": [], "system_msg": ""}
1721            return
1722
1723        # Stream the LLM response chunk by chunk
1724        full_chunks = []
1725        async for chunk in self.chat_stream(text):
1726            if isinstance(chunk, dict):
1727                break   # usage dict — discard, already tracked inside chat_stream
1728            full_chunks.append(chunk)
1729            yield chunk
1730
1731        full_response = "".join(full_chunks)
1732
1733        # Process any <spawn> blocks in the completed response
1734        _, spawned = await self._process_spawn_commands(full_response)
1735
1736        # Execute any @agent-name {payload} delegation patterns the LLM produced
1737        # If delegations ran, yield the results as an additional chunk
1738        delegated = await self._execute_llm_delegations(full_response)
1739        if delegated != full_response:
1740            # Find what changed and yield just the new parts
1741            import re as _re
1742            results = _re.findall(r'[✅❌]\s+\S+.*', delegated)
1743            if results:
1744                yield "\n" + "\n".join(results)
1745        full_response = delegated
1746
1747        system_msg = ""
1748        if spawned:
1749            names      = ", ".join(f"'{a.name}'" for a in spawned if not isinstance(a, _SpawnPlaceholder))
1750            bg_names   = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)]
1751            parts = []
1752            if names:
1753                replaced = '"replace": true' in full_response or '"replace":true' in full_response
1754                parts.append(f"{'Replaced' if replaced else 'Spawned'} {names} — will auto-restore on restart")
1755            if bg_names:
1756                parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly")
1757            system_msg = " | ".join(parts)
1758
1759        await self._mqtt_publish(
1760            f"agents/{self.actor_id}/logs",
1761            {"type": "user_interaction", "input": text[:100], "response": full_response[:200]},
1762        )
1763
1764        yield {"done": True, "spawned": spawned, "system_msg": system_msg}
1765
1766    # ── Planner ────────────────────────────────────────────────────────────
1767
1768    _PLANNING_KEYWORDS = [
1769        # Coordination signals
1770        "and then", "after that", "also", "combine", "compare",
1771        "coordinate", "plan", "pipeline", "orchestrate", "summarize both",
1772        "using multiple", "all agents", "several agents",
1773        # Multi-step / multi-domain signals
1774        "first.*then", "step by step", "in order",
1775        "weather.*news", "news.*weather", "manual.*code", "search.*analyze",
1776        # Reactive pipeline signals
1777        "if.*then", "when.*send", "when.*turn", "when.*open", "when.*close",
1778        "whenever", "monitor.*and", "watch.*and", "detect.*and",
1779        "notify me", "alert me", "automatically",
1780    ]
1781
1782    async def _needs_planning(self, text: str) -> bool:
1783        """
1784        Heuristic: does this task benefit from multi-agent coordination?
1785        Keeps main fast — only escalates genuinely complex requests.
1786        """
1787        import re
1788        lowered = text.lower()
1789
1790        # Explicit user request for coordination
1791        if any(w in lowered for w in (
1792            "coordinate:", "plan:", "pipeline:", "@planner",
1793            "ask the planner", "use the planner", "create a pipeline",
1794            "set up a pipeline", "create a rule", "set up a rule",
1795        )):
1796            return True
1797
1798        # Keyword heuristic — multiple signals needed to avoid false positives
1799        hits = sum(1 for kw in self._PLANNING_KEYWORDS if re.search(kw, lowered))
1800        if hits >= 2:
1801            return True
1802
1803        # References two or more known agent names
1804        if self._registry:
1805            agent_names = [a.name for a in self._registry.all_actors()
1806                           if a.name not in {"main", "monitor", "installer"}]
1807            mentioned = sum(1 for name in agent_names if name in lowered)
1808            if mentioned >= 2:
1809                return True
1810
1811        return False
1812
1813    async def _run_planner(self, task: str) -> Optional[str]:
1814        """Spawn a PlannerAgent, hand it the task, wait for the result."""
1815        from .planner_agent import PlannerAgent
1816        import uuid
1817
1818        # Enrich vague follow-up tasks with recent conversation context
1819        # so the planner has the full picture (e.g. which entity was found)
1820        enriched_task = task
1821        if self._conversation_history and len(task.split()) < 15:
1822            # Short/vague task — inject last 3 exchanges as context
1823            recent = self._conversation_history[-6:]  # 3 user+assistant pairs
1824            ctx_lines = []
1825            for m in recent:
1826                role    = "User" if m["role"] == "user" else "Assistant"
1827                content = str(m["content"])[:300]
1828                ctx_lines.append(f"{role}: {content}")
1829            if ctx_lines:
1830                enriched_task = (
1831                    f"{task}\n\n"
1832                    f"[Context from recent conversation:]\n"
1833                    + "\n".join(ctx_lines)
1834                )
1835
1836        planner_name = f"planner-{uuid.uuid4().hex[:6]}"
1837        logger.info(f"[{self.name}] Spawning planner '{planner_name}' for: {enriched_task[:60]}")
1838
1839        await self._mqtt_publish(
1840            f"agents/{self.actor_id}/logs",
1841            {"type": "log", "message": f"Complex task detected — spawning planner...", "timestamp": __import__('time').time()},
1842        )
1843
1844        task_id = f"plan_{uuid.uuid4().hex[:8]}"
1845        future: asyncio.Future = asyncio.get_running_loop().create_future()
1846        self._result_futures[task_id] = future
1847
1848        try:
1849            planner = await self.spawn(
1850                PlannerAgent,
1851                name=planner_name,
1852                llm_provider=self.llm,
1853                task=enriched_task,
1854                reply_to_id=self.actor_id,
1855                reply_task_id=task_id,
1856                auto_terminate=True,
1857                persistence_dir=str(self._persistence_dir.parent),
1858            )
1859            if not planner:
1860                return None
1861
1862            result_payload = await asyncio.wait_for(future, timeout=180.0)
1863            answer = result_payload.get("result") or result_payload.get("text") or ""
1864            spawned_names = result_payload.get("spawned", [])
1865            if spawned_names:
1866                answer += f"\n\n[System: Planner created new agents: {', '.join(spawned_names)} — saved for future use]"
1867            return answer
1868
1869        except asyncio.TimeoutError:
1870            logger.warning(f"[{self.name}] Planner timed out for: {task[:60]}")
1871            return "The pipeline is taking longer than expected to set up. Check `/rules` in a moment to see if agents were spawned, or try again."
1872        except Exception as e:
1873            logger.error(f"[{self.name}] Planner error: {e}")
1874            return None
1875        finally:
1876            self._result_futures.pop(task_id, None)
1877
1878        # ── Spawn ──────────────────────────────────────────────────────────────
1879
1880    async def _execute_llm_delegations(self, response: str) -> str:
1881        """
1882        Scan the LLM response for @agent-name {json} delegation patterns and execute them.
1883        Replaces the pattern in the response with the actual result.
1884
1885        Matches lines like:
1886            @doc-to-pptx-agent {"file_path": "...", "output_path": "..."}
1887            @weather-agent {"city": "Athens"}
1888        """
1889        import re
1890
1891        # Find @agent-name then scan for the matching { } block manually
1892        # (regex alone can't handle } inside string values reliably)
1893        delegations = []   # list of (full_match_str, agent_name, payload_dict)
1894
1895        for m in re.finditer(r'@([\w][\w\-]*)\s+(\{)', response):
1896            agent_name = m.group(1)
1897            if agent_name == self.name:
1898                continue
1899            start = m.start(2)   # position of opening {
1900            depth = 0
1901            end   = start
1902            for i, ch in enumerate(response[start:], start):
1903                if ch == '{':
1904                    depth += 1
1905                elif ch == '}':
1906                    depth -= 1
1907                    if depth == 0:
1908                        end = i + 1
1909                        break
1910            if depth != 0:
1911                continue   # unmatched braces — skip
1912            json_str = response[start:end]
1913            try:
1914                payload = json.loads(json_str)
1915            except json.JSONDecodeError:
1916                continue
1917            delegations.append((response[m.start():end], agent_name, payload))
1918
1919        replacements = []
1920        for full_match, agent_name, payload in delegations:
1921            # Check if agent is running, if not auto-spawn via catalog
1922            target = self._registry.find_by_name(agent_name) if self._registry else None
1923            if not target:
1924                manifest = self._agent_manifests.get(agent_name, {})
1925                if manifest.get("spawnable") and manifest.get("catalog"):
1926                    catalog_actor = self._registry.find_by_name(manifest["catalog"]) if self._registry else None
1927                    if catalog_actor and hasattr(catalog_actor, "_action_spawn"):
1928                        logger.info(f"[{self.name}] Auto-spawning '{agent_name}' via catalog...")
1929                        try:
1930                            spawn_result = await catalog_actor._action_spawn(agent_name, {})
1931                            if spawn_result and spawn_result.get("ok"):
1932                                await asyncio.sleep(0.5)
1933                                target = self._registry.find_by_name(agent_name) if self._registry else None
1934                                logger.info(f"[{self.name}] '{agent_name}' spawned successfully")
1935                            else:
1936                                err = spawn_result.get("message", "unknown") if spawn_result else "no response"
1937                                logger.warning(f"[{self.name}] Spawn failed for '{agent_name}': {err}")
1938                        except Exception as e:
1939                            logger.error(f"[{self.name}] Spawn error for '{agent_name}': {e}")
1940
1941            if not target:
1942                replacements.append((full_match, f"[Could not reach {agent_name}]"))
1943                continue
1944
1945            json_str = json.dumps(payload)
1946            logger.info(f"[{self.name}] Executing LLM delegation → @{agent_name} {json_str[:80]}")
1947            try:
1948                result = await self.delegate_task(agent_name, json_str, timeout=300.0)
1949                if result:
1950                    if isinstance(result, dict):
1951                        error = result.get("error")
1952                        if error:
1953                            result_str = f"❌ {agent_name} failed: {error}"
1954                        else:
1955                            for key in ("pptx_path", "image_path", "result", "message", "output", "text"):
1956                                if result.get(key):
1957                                    result_str = f"✅ {agent_name} completed: {key}={result[key]}"
1958                                    break
1959                            else:
1960                                result_str = f"✅ {agent_name} completed: {result}"
1961                    else:
1962                        result_str = f"✅ {agent_name}: {result}"
1963                else:
1964                    result_str = f"[{agent_name} did not respond]"
1965            except Exception as e:
1966                result_str = f"[{agent_name} error: {e}]"
1967
1968            replacements.append((full_match, result_str))
1969
1970        # Apply replacements
1971        for original, replacement in replacements:
1972            response = response.replace(original, replacement)
1973
1974        return response
1975
1976    @staticmethod
1977    def _parse_spawn_config(raw: str) -> dict:
1978        """
1979        Robustly parse a spawn config that may contain raw multiline code strings.
1980        Uses character scanning to correctly handle } and " inside the code value.
1981        """
1982        raw = raw.strip()
1983
1984        # Strategy 1: standard JSON (works when LLM properly escapes newlines)
1985        try:
1986            return json.loads(raw)
1987        except json.JSONDecodeError:
1988            pass
1989
1990        # Strategy 2: backtick-delimited code (rare but some LLMs use it)
1991        bt_match = re.search(r'"code"\s*:\s*`(.*?)`', raw, re.DOTALL)
1992        if bt_match:
1993            code_raw    = bt_match.group(1)
1994            placeholder = re.sub(r'"code"\s*:\s*`.*?`', '"code": "__CODE__"', raw, flags=re.DOTALL)
1995            config      = json.loads(placeholder)
1996            config["code"] = code_raw
1997            return config
1998
1999        # Strategy 3: character scanner — find opening " after "code":
2000        # then scan forward respecting escape sequences to find the real closing "
2001        # This correctly handles } and { inside the code value.
2002        key_match = re.search(r'"code"\s*:\s*"', raw)
2003        if not key_match:
2004            raise ValueError(f"No 'code' key found in spawn config:\n{raw[:200]}")
2005
2006        code_start = key_match.end()   # index right after the opening "
2007        i = code_start
2008        while i < len(raw):
2009            if raw[i] == '\\':
2010                i += 2             # skip escaped character
2011                continue
2012            if raw[i] == '"':
2013                break              # found unescaped closing quote
2014            i += 1
2015
2016        code_raw    = raw[code_start:i]
2017        placeholder = raw[:key_match.start()] + '"code": "__CODE__"' + raw[i+1:]
2018
2019        try:
2020            config = json.loads(placeholder)
2021        except json.JSONDecodeError as e:
2022            raise ValueError(f"Spawn config JSON invalid after code extraction: {e}\nPlaceholder:\n{placeholder[:300]}")
2023
2024        # Unescape sequences the LLM may have added
2025        config["code"] = (code_raw
2026                          .replace("\\n", "\n")
2027                          .replace('\\"', '"')
2028                          .replace("\\t", "\t"))
2029        return config
2030
2031    async def _process_spawn_commands(self, response: str):
2032        spawned = []
2033        pattern = r'<spawn>(.*?)</spawn>'
2034
2035        for match in re.findall(pattern, response, re.DOTALL):
2036            try:
2037                config = self._parse_spawn_config(match.strip())
2038                # LLM agents have no "code" — only check for code if type is dynamic
2039                agent_type = config.get("type", "dynamic")
2040                has_code   = bool(config.get("code", "").strip())
2041                has_prompt = bool(config.get("system_prompt", "").strip())
2042                if agent_type == "dynamic" and not has_code:
2043                    logger.error(f"[{self.name}] Dynamic agent has no code: {config.get('name')}")
2044                    continue
2045                if agent_type == "llm" and not has_prompt:
2046                    logger.warning(f"[{self.name}] LLM agent has no system_prompt, using default: {config.get('name')}")
2047                actor = await self._spawn_from_config(config, save=True)
2048                if actor:
2049                    spawned.append(actor)
2050            except Exception as e:
2051                logger.error(f"[{self.name}] Spawn failed: {e}\nRaw block:\n{match[:500]}")
2052
2053        clean = re.sub(pattern, '', response, flags=re.DOTALL).strip()
2054        return clean, spawned
2055
2056    async def _spawn_from_config(self, config: dict, save: bool = True) -> Optional[Actor]:
2057        name = config.get("name", "dynamic-agent")
2058        node = config.get("node", "").strip()
2059
2060        # Remote spawn — publish to the node's spawn topic via MQTT
2061        if node:
2062            return await self._spawn_remote(config, node, save)
2063
2064        # Local spawn
2065        from .dynamic_agent import DynamicAgent
2066
2067        existing = self._registry.find_by_name(name) if self._registry else None
2068        replace  = config.get("replace", False)
2069
2070        if existing:
2071            if not replace:
2072                logger.info(f"[{self.name}] '{name}' already exists (use replace=true to update).")
2073                return existing
2074            # Stop the old agent cleanly before spawning the replacement
2075            logger.info(f"[{self.name}] Replacing '{name}' with updated code...")
2076            try:
2077                if self._registry:
2078                    await self._registry.unregister(existing.actor_id)
2079                await existing.stop()
2080                await asyncio.sleep(0.5)
2081            except Exception as e:
2082                logger.warning(f"[{self.name}] Error stopping old '{name}': {e}")
2083
2084        agent_type    = config.get("type", "dynamic")
2085        code          = config.get("code", "").strip()
2086        system_prompt = config.get("system_prompt", "").strip()
2087
2088        # Route to the right agent class
2089        if agent_type == "ha_actuator":
2090            actor = await self._spawn_ha_actuator(config, name)
2091        elif agent_type == "manual" or name == "manual-agent":
2092            actor = await self._spawn_manual_agent(config, name)
2093        elif agent_type == "llm" or (not code and system_prompt):
2094            actor = await self._spawn_llm_agent(config, name)
2095        elif code:
2096            actor = await self._spawn_dynamic_agent(config, name, code)
2097        else:
2098            logger.warning(f"[{self.name}] Spawn config for '{name}' has neither code nor system_prompt.")
2099            return None
2100
2101        if actor and save:
2102            self._save_to_spawn_registry(config)
2103
2104        return actor
2105
2106    async def _spawn_ha_actuator(self, config: dict, name: str):
2107        """Spawn a HomeAssistantActuatorAgent from a spawn block with type: ha_actuator."""
2108        from .home_assistant_actuator_agent import (
2109            HomeAssistantActuatorAgent, ActuatorConfig, ActuatorAction, ActuatorCondition,
2110        )
2111        import hashlib as _hl
2112
2113        # Ensure unique name if collision
2114        if self._registry and self._registry.find_by_name(name):
2115            suffix = _hl.md5(f"{name}{__import__('time').time()}".encode()).hexdigest()[:4]
2116            name   = f"{name}-{suffix}"
2117
2118        automation_id = config.get("automation_id", name)
2119        actuator_cfg  = ActuatorConfig(
2120            automation_id    = automation_id,
2121            description      = config.get("description", ""),
2122            mqtt_topics      = config.get("mqtt_topics", []),
2123            actions          = [ActuatorAction.from_dict(a) for a in config.get("actions", [])],
2124            conditions       = [ActuatorCondition.from_dict(c) for c in config.get("conditions", [])],
2125            detection_filter = config.get("detection_filter"),
2126            cooldown_seconds = float(config.get("cooldown_seconds", 10.0)),
2127        )
2128        logger.info(f"[{self.name}] Spawning HomeAssistantActuatorAgent '{name}'")
2129        actor = await self.spawn(
2130            HomeAssistantActuatorAgent,
2131            config          = actuator_cfg,
2132            name            = name,
2133            persistence_dir = str(self._persistence_dir.parent),
2134        )
2135        return actor
2136
2137    async def _spawn_manual_agent(self, config: dict, name: str):
2138        """Spawn the pre-defined ManualAgent — robust PDF manual search and Q&A."""
2139        from .manual_agent import ManualAgent
2140        logger.info(f"[{self.name}] Spawning ManualAgent '{name}'")
2141        actor = await self.spawn(
2142            ManualAgent,
2143            name=name,
2144            llm_provider=self.llm,
2145            persistence_dir=str(self._persistence_dir.parent),
2146        )
2147        return actor
2148
2149    async def _spawn_llm_agent(self, config: dict, name: str):
2150        """Spawn a proper LLMAgent — best for chat, Q&A, reasoning tasks."""
2151        from .llm_agent import LLMAgent
2152        system_prompt = config.get("system_prompt", "You are a helpful assistant.")
2153        logger.info(f"[{self.name}] Spawning LLM agent '{name}'")
2154        actor = await self.spawn(
2155            LLMAgent,
2156            name=name,
2157            llm_provider=self.llm,
2158            system_prompt=system_prompt,
2159            persistence_dir=str(self._persistence_dir.parent),
2160        )
2161        return actor
2162
2163    async def _spawn_dynamic_agent(self, config: dict, name: str, code: str):
2164        """Spawn a DynamicAgent — best for data pipelines, sensors, tools."""
2165        packages = config.get("install", [])
2166        if isinstance(packages, str):
2167            packages = [p.strip() for p in packages.replace(",", " ").split()]
2168
2169        if packages:
2170            # Fast-path: check which packages actually need installing.
2171            # On restore (after restart), packages from the previous session
2172            # are already installed — no need to wait for the installer agent
2173            # which might not have started yet.
2174            import importlib
2175            needed = []
2176            for pkg in packages:
2177                import_name = pkg.replace("-", "_").split("[")[0]
2178                try:
2179                    importlib.import_module(import_name)
2180                except ImportError:
2181                    needed.append(pkg)
2182
2183            if needed:
2184                # Some packages missing — install in background
2185                logger.info(f"[{self.name}] Scheduling background install+spawn for '{name}': {needed}")
2186                asyncio.create_task(self._install_then_spawn(config, name, code, needed))
2187                return _SpawnPlaceholder(name)
2188            else:
2189                # All packages already available — spawn immediately
2190                logger.info(f"[{self.name}] All deps for '{name}' already installed — spawning directly")
2191                return await self._do_spawn_dynamic(config, name, code)
2192        else:
2193            return await self._do_spawn_dynamic(config, name, code)
2194
2195    async def _install_then_spawn(self, config: dict, name: str, code: str, packages: list):
2196        """Background task: install packages then spawn the agent."""
2197        try:
2198            await self._mqtt_publish(
2199                f"agents/{self.actor_id}/logs",
2200                {"type": "log", "message": f"Installing {packages} for {name}...", "timestamp": __import__("time").time()},
2201            )
2202            await self._install_packages(packages)
2203            actor = await self._do_spawn_dynamic(config, name, code)
2204            if actor:
2205                self._save_to_spawn_registry(config)
2206                await self._mqtt_publish(
2207                    f"agents/{self.actor_id}/logs",
2208                    {"type": "spawned", "message": f"'{name}' spawned after install", "child_name": name, "timestamp": __import__("time").time()},
2209                )
2210                logger.info(f"[{self.name}] Background spawn complete: {name}")
2211        except Exception as e:
2212            logger.error(f"[{self.name}] Background install+spawn failed for '{name}': {e}")
2213
2214    async def _do_spawn_dynamic(self, config: dict, name: str, code: str):
2215        """Actually create and start the DynamicAgent."""
2216        from .dynamic_agent import DynamicAgent
2217        actor = await self.spawn(
2218            DynamicAgent,
2219            name=name,
2220            code=code,
2221            poll_interval=float(config.get("poll_interval", 1.0)),
2222            description=config.get("description", ""),
2223            input_schema=config.get("input_schema", {}),
2224            output_schema=config.get("output_schema", {}),
2225            llm_provider=self.llm,
2226            persistence_dir=str(self._persistence_dir.parent),
2227            trusted=bool(config.get("trusted", False)),
2228        )
2229        
2230        # Register TopicContract if spawn config declares pub/sub topics
2231        if actor and (config.get("publishes") or config.get("subscribes")):
2232            try:
2233                from ..core.topic_bus import TopicContract, get_topic_bus
2234                contract = TopicContract.from_spawn_config({**config, "actor_id": actor.actor_id})
2235                bus = get_topic_bus()
2236                if bus:
2237                    bus.register_contract(contract)
2238                    logger.info(f"[{self.name}] Registered TopicContract for '{name}': "
2239                                f"pub={contract.publishes} sub={contract.subscribes}")
2240            except Exception as e:
2241                logger.debug(f"[{self.name}] TopicContract registration skipped: {e}")
2242                
2243        return actor
2244
2245    async def _install_packages(self, packages: list[str]):
2246        """Delegate package installation to the installer agent."""
2247        if not self._registry:
2248            return
2249
2250        # Fast path: check which packages actually need installing
2251        import importlib, sys
2252        needed = []
2253        for pkg in packages:
2254            import_name = pkg.replace("-", "_").split("[")[0]
2255            try:
2256                importlib.import_module(import_name)
2257            except ImportError:
2258                needed.append(pkg)
2259        if not needed:
2260            logger.info(f"[{self.name}] All packages already available: {packages} — skipping install")
2261            return
2262
2263        installer = self._registry.find_by_name("installer")
2264        if not installer:
2265            logger.warning(f"[{self.name}] installer agent not found — skipping install of {needed}")
2266            return
2267        logger.info(f"[{self.name}] Installing packages via installer: {needed}")
2268        import uuid
2269        task_id = f"install_{uuid.uuid4().hex[:8]}"
2270        future = asyncio.get_event_loop().create_future()
2271        self._result_futures[task_id] = future
2272        await self.send(installer.actor_id, MessageType.TASK, {
2273            "action": "install",
2274            "packages": needed,
2275            "task": task_id,
2276            "_task_id": task_id,
2277            "reply_to": self.actor_id,
2278        })
2279        try:
2280            result = await asyncio.wait_for(future, timeout=120.0)
2281            logger.info(f"[{self.name}] Install result: {result.get('message', result)}")
2282            if result.get("failed"):
2283                logger.warning(f"[{self.name}] Failed to install: {result['failed']}")
2284        except asyncio.TimeoutError:
2285            logger.warning(f"[{self.name}] Package install timed out for {needed}")
2286        finally:
2287            self._result_futures.pop(task_id, None)
2288
2289    async def run_pipeline(self, goal: str, agents: list[str], timeout: float = 300.0, force_replan: bool = False) -> dict:
2290        """
2291        Spawn an ephemeral TaskManager to coordinate a multi-agent pipeline.
2292        Returns the final synthesised result without blocking main's context.
2293
2294        Usage:
2295            result = await main.run_pipeline(
2296                goal="Find the Philips EP2220 manual and answer: how do I descale it?",
2297                agents=["manual-agent", "installer"]
2298            )
2299        """
2300        from .task_manager import TaskManager
2301        import uuid
2302
2303        task_id = uuid.uuid4().hex[:8]
2304        future  = asyncio.get_event_loop().create_future()
2305        self._result_futures[task_id] = future
2306
2307        mgr = await self.spawn(
2308            TaskManager,
2309            goal=goal,
2310            available_agents=agents,
2311            llm_provider=self.llm,
2312            reply_to_id=self.actor_id,
2313            reply_task_id=task_id,
2314            auto_destroy=True,
2315            force_replan=force_replan,
2316            cache_dir=str(self._persistence_dir.parent / "plan_cache"),
2317            persistence_dir=str(self._persistence_dir.parent),
2318        )
2319
2320        logger.info(f"[{self.name}] Pipeline started: {mgr.name} for goal: {goal[:60]}")
2321
2322        try:
2323            result = await asyncio.wait_for(future, timeout=timeout)
2324            return result
2325        except asyncio.TimeoutError:
2326            logger.warning(f"[{self.name}] Pipeline timed out after {timeout}s")
2327            return {"error": f"Pipeline timed out after {timeout}s"}
2328        finally:
2329            self._result_futures.pop(task_id, None)
2330
2331    async def _spawn_remote(self, config: dict, node: str, save: bool) -> None:
2332        """
2333        Publish a spawn command to a remote node via MQTT.
2334        The remote_runner.py on that machine will receive it and run the agent.
2335        Remote agents appear in the dashboard exactly like local ones
2336        because they connect to the same MQTT broker.
2337
2338        Also updates nodes/{node}/desired_state (retained) with ALL agents for
2339        this node so the runner can self-heal after a reboot.
2340
2341        If the spawn config has an 'install' list, packages are installed on the
2342        remote node via SSH BEFORE the agent is spawned — so setup() won't fail
2343        with 'No module named X'.
2344        """
2345        name     = config.get("name", "remote-agent")
2346        packages = config.get("install", [])
2347        if isinstance(packages, str):
2348            packages = [p.strip() for p in packages.replace(",", " ").split()]
2349
2350        logger.info(f"[{self.name}] Spawning '{name}' on remote node '{node}'")
2351
2352        # ── Install packages on remote node first ─────────────────────────────
2353        if packages:
2354            # Look up SSH credentials from known_nodes or ask installer
2355            node_info  = self._known_nodes.get(node, {})
2356            host       = node_info.get("host")
2357            # Try to get host from spawn registry (node_deploy stored it)
2358            # Try to get host from known_nodes, spawn registry, or installer's persisted credentials
2359            if not host:
2360                reg = self._get_spawn_registry()
2361                for cfg in reg.values():
2362                    if cfg.get("node") == node and cfg.get("host"):
2363                        host = cfg["host"]
2364                        break
2365            if not host and self._registry:
2366                installer = self._registry.find_by_name("installer")
2367                if installer:
2368                    host = installer.recall(f"node_host_{node}")
2369                    if not node_info.get("user"):
2370                        node_info["user"] = installer.recall(f"node_user_{node}") or "pi"
2371
2372            if host and self._registry:
2373                installer = self._registry.find_by_name("installer")
2374                if installer:
2375                    # Load full persisted credentials for this node
2376                    node_creds = (installer.recall("_node_credentials") or {}).get(node, {})
2377                    ssh_user     = node_creds.get("user") or node_info.get("user", "pi")
2378                    ssh_password = node_creds.get("password") or ""
2379                    ssh_key_path = node_creds.get("key_path") or ""
2380
2381                    logger.info(f"[{self.name}] Installing {packages} on {node} ({host}) before spawn...")
2382                    import uuid as _uuid
2383                    task_id = f"remote_install_{_uuid.uuid4().hex[:8]}"
2384                    future  = asyncio.get_running_loop().create_future()
2385                    self._result_futures[task_id] = future
2386                    install_payload = {
2387                        "action":    "node_install",
2388                        "host":      host,
2389                        "user":      ssh_user,
2390                        "packages":  packages,
2391                        "node_name": node,
2392                        "_task_id":  task_id,
2393                        "task":      task_id,
2394                    }
2395                    if ssh_password:
2396                        install_payload["password"] = ssh_password
2397                    if ssh_key_path:
2398                        install_payload["key_path"] = ssh_key_path
2399                    await self.send(installer.actor_id, MessageType.TASK, install_payload)
2400                    try:
2401                        result = await asyncio.wait_for(future, timeout=180.0)
2402                        if result.get("success"):
2403                            logger.info(f"[{self.name}] Remote install OK: {packages}")
2404                        else:
2405                            logger.warning(f"[{self.name}] Remote install issue: {result.get('error', '?')}")
2406                    except asyncio.TimeoutError:
2407                        logger.warning(f"[{self.name}] Remote install timed out — spawning anyway")
2408                    finally:
2409                        self._result_futures.pop(task_id, None)
2410                else:
2411                    logger.warning(f"[{self.name}] installer not found — skipping remote package install for '{name}'")
2412            else:
2413                logger.warning(
2414                    f"[{self.name}] No host known for node '{node}' — cannot pre-install {packages}. "
2415                    f"Install manually: ssh into {node} and run: pip install {' '.join(packages)} --break-system-packages"
2416                )
2417
2418        # Publish individual spawn (for immediate delivery)
2419        await self._mqtt_publish(
2420            f"nodes/{node}/spawn",
2421            config,
2422            retain=True,
2423            qos=1,
2424        )
2425
2426        # Update desired state for the whole node (retained — survives Pi reboot)
2427        await self._update_node_desired_state(node, config)
2428
2429        await self._mqtt_publish(
2430            f"agents/{self.actor_id}/logs",
2431            {"type": "spawned", "message": f"Spawned '{name}' on node '{node}'",
2432             "child_name": name, "node": node, "timestamp": __import__("time").time()}
2433        )
2434
2435        if save:
2436            self._save_to_spawn_registry(config)
2437
2438        return None
2439
2440    async def _update_node_desired_state(self, node: str, new_config: dict = None,
2441                                          remove_name: str = None) -> None:
2442        """
2443        Maintain nodes/{node}/desired_state as a retained MQTT message containing
2444        ALL agents that should run on this node. The runner reads this on startup
2445        and reconciles — spawning missing agents, ignoring already-running ones.
2446        """
2447        # Build desired state from spawn registry filtered to this node
2448        reg = self._get_spawn_registry()
2449        agents = {
2450            name: cfg for name, cfg in reg.items()
2451            if cfg.get("node", "").strip() == node
2452        }
2453
2454        # Apply pending change before publishing
2455        if new_config:
2456            agents[new_config["name"]] = new_config
2457        if remove_name:
2458            agents.pop(remove_name, None)
2459
2460        await self._mqtt_publish(
2461            f"nodes/{node}/desired_state",
2462            {"node": node, "agents": list(agents.values()),
2463             "timestamp": __import__("time").time()},
2464            retain=True,
2465            qos=1,
2466        )
2467        logger.info(f"[{self.name}] Desired state for '{node}': {list(agents.keys())}")
2468
2469    # ── Node registry ──────────────────────────────────────────────────────
2470
2471    def list_nodes(self) -> list[dict]:
2472        """Return all known remote nodes with their last-seen time and running agents."""
2473        import time as _time
2474        now = _time.time()
2475        return [
2476            {
2477                "node":      name,
2478                "agents":    info.get("agents", []),
2479                "last_seen": info.get("last_seen", 0),
2480                "online":    (now - info.get("last_seen", 0)) < 30,
2481            }
2482            for name, info in self._known_nodes.items()
2483        ]
2484
2485    def list_topics(self, keyword: str = "") -> list[dict]:
2486        """
2487        Return all known MQTT topics published by agents, optionally filtered by keyword.
2488        Each entry: {"topic": str, "agents": [{"name", "node", "description"}, ...]}
2489
2490        Example:
2491            list_topics("cpu")     → topics containing "cpu"
2492            list_topics("temp")    → topics containing "temp"
2493            list_topics()          → all topics
2494        """
2495        results = []
2496        kw = keyword.lower()
2497        for topic, manifests in self._topic_registry.items():
2498            if kw and kw not in topic.lower():
2499                continue
2500            results.append({
2501                "topic":   topic,
2502                "agents":  [{"name": m.get("name"), "node": m.get("node"),
2503                             "description": m.get("description", "")} for m in manifests],
2504            })
2505        return sorted(results, key=lambda x: x["topic"])
2506
2507    def list_capabilities(self, keyword: str = "") -> list[dict]:
2508        """
2509        Return all known agents with their full capability profile:
2510        name, description, capabilities, input_schema, output_schema.
2511
2512        Example:
2513            list_capabilities()            → all agents
2514            list_capabilities("weather")   → agents with "weather" in description/capabilities
2515        """
2516        results = []
2517        kw = keyword.lower().strip()
2518        # Support multi-word keywords — match if ANY word appears in the haystack
2519        kw_words = kw.split() if kw else []
2520        for name, manifest in self._agent_manifests.items():
2521            desc  = manifest.get("description", "")
2522            caps  = manifest.get("capabilities", [])
2523            # Filter by keyword across description, capabilities, and name
2524            if kw_words:
2525                haystack = desc.lower() + " " + " ".join(caps).lower() + " " + name.lower()
2526                if not any(w in haystack for w in kw_words):
2527                    continue
2528            results.append({
2529                "name":          name,
2530                "node":          manifest.get("node"),
2531                "description":   desc,
2532                "capabilities":  caps,
2533                "input_schema":  manifest.get("input_schema",  {}),
2534                "output_schema": manifest.get("output_schema", {}),
2535                "spawnable":     manifest.get("spawnable", False),
2536                "running":       bool(self._registry and self._registry.find_by_name(name)),
2537            })
2538        return sorted(results, key=lambda x: x["name"])
2539
2540    async def _manifest_listener(self):
2541        """
2542        Subscribe to agents/+/manifest and build a searchable topic registry.
2543        Retained manifests are delivered immediately on subscribe so the registry
2544        is populated even for agents that started before main restarted.
2545        """
2546        try:
2547            import aiomqtt
2548        except ImportError:
2549            return
2550
2551        while self.state.value not in ("stopped", "failed"):
2552            try:
2553                async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client:
2554                    await client.subscribe("agents/+/manifest")
2555                    logger.info("[main] Subscribed to agent manifests.")
2556                    async for msg in client.messages:
2557                        try:
2558                            data = json.loads(msg.payload.decode())
2559                        except Exception:
2560                            continue
2561                        if not isinstance(data, dict):
2562                            continue
2563                        agent_name = data.get("name", "?")
2564                        published  = data.get("publishes", [])
2565                        # Update topic registry
2566                        for topic in published:
2567                            existing = self._topic_registry.setdefault(topic, [])
2568                            # Replace existing entry for this agent or append
2569                            updated = False
2570                            for i, m in enumerate(existing):
2571                                if m.get("name") == agent_name:
2572                                    existing[i] = data
2573                                    updated = True
2574                                    break
2575                            if not updated:
2576                                existing.append(data)
2577                        # Also store full manifest by agent name for capability queries
2578                        self._agent_manifests[agent_name] = data
2579                        logger.debug(f"[main] Manifest from '{agent_name}': {published}")
2580            except asyncio.CancelledError:
2581                break
2582            except Exception as e:
2583                if self.state.value not in ("stopped", "failed"):
2584                    logger.warning(f"[main] Manifest listener error: {e}. Reconnecting in 5s…")
2585                    await asyncio.sleep(5)
2586
2587    async def migrate_agent(self, agent_name: str, target_node: str) -> dict:
2588        """
2589        Move a running agent to a different node.
2590
2591        If the agent is local: saves updated config (with new node) and re-spawns remotely.
2592        If the agent is remote: publishes a migrate command to its current node.
2593        Returns {"success": bool, "message": str}
2594        """
2595        import time as _time
2596
2597        reg = self._get_spawn_registry()
2598        config = reg.get(agent_name)
2599        if not config:
2600            return {"success": False, "message": f"Agent '{agent_name}' not in spawn registry."}
2601
2602        current_node = config.get("node", "").strip()
2603
2604        if current_node == target_node:
2605            return {"success": False, "message": f"Agent '{agent_name}' is already on '{target_node}'."}
2606
2607        if current_node:
2608            # ── Remote → Remote migration ────────────────────────────────────
2609            logger.info(f"[{self.name}] Migrating '{agent_name}' from node '{current_node}' → '{target_node}'")
2610            await self._mqtt_publish(
2611                f"nodes/{current_node}/migrate",
2612                {"name": agent_name, "target_node": target_node},
2613            )
2614        else:
2615            # ── Local → Remote migration ─────────────────────────────────────
2616            logger.info(f"[{self.name}] Migrating LOCAL agent '{agent_name}' → remote node '{target_node}'")
2617
2618            # Stop the local instance
2619            if self._registry:
2620                local = self._registry.find_by_name(agent_name)
2621                if local:
2622                    try:
2623                        await self._registry.unregister(local.actor_id)
2624                        await local.stop()
2625                        await asyncio.sleep(0.3)
2626                    except Exception as e:
2627                        logger.warning(f"[{self.name}] Could not stop local '{agent_name}': {e}")
2628
2629            # Update config with new node target and re-spawn remotely
2630            new_config = dict(config)
2631            new_config["node"] = target_node
2632            new_config.pop("replace", None)
2633
2634            await self._spawn_remote(new_config, target_node, save=True)
2635
2636        # Update spawn registry so next restart re-spawns to the right node
2637        updated = dict(config)
2638        updated["node"] = target_node
2639        self._save_to_spawn_registry(updated)
2640
2641        msg = (f"Migrating '{agent_name}' from '{current_node or 'local'}' "
2642               f"→ '{target_node}'. It will appear in the dashboard shortly.")
2643        logger.info(f"[{self.name}] {msg}")
2644        return {"success": True, "message": msg}
2645
2646    async def _node_heartbeat_listener(self):
2647        """
2648        Subscribe to nodes/+/heartbeat so main knows which remote nodes are online.
2649        Updates self._known_nodes which is used by list_nodes() and the LLM context.
2650        """
2651        try:
2652            import aiomqtt
2653        except ImportError:
2654            logger.warning("[main] aiomqtt not available — node heartbeat tracking disabled.")
2655            return
2656
2657        while self.state.value not in ("stopped", "failed"):
2658            try:
2659                async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client:
2660                    await client.subscribe("nodes/+/heartbeat")
2661                    await client.subscribe("nodes/+/migrate_result")
2662                    logger.info("[main] Subscribed to node heartbeats.")
2663                    async for msg in client.messages:
2664                        topic = str(msg.topic)
2665                        try:
2666                            data = json.loads(msg.payload.decode())
2667                        except Exception:
2668                            continue
2669
2670                        parts = topic.split("/")
2671                        if len(parts) < 3:
2672                            continue
2673                        node_name = parts[1]
2674
2675                        if topic.endswith("/heartbeat"):
2676                            import time as _t
2677                            self._known_nodes[node_name] = {
2678                                "last_seen": _t.time(),
2679                                "agents":   data.get("agents", []),
2680                                "node_id":  data.get("node_id", ""),
2681                            }
2682                        elif topic.endswith("/migrate_result"):
2683                            success = data.get("success", False)
2684                            agent   = data.get("agent", "?")
2685                            to_node = data.get("to_node", "?")
2686                            sev     = "info" if success else "warning"
2687                            self._pending_notifications.append({
2688                                "_monitor_notification": True,
2689                                "message": (
2690                                    f"Migration of '{agent}' to '{to_node}' succeeded."
2691                                    if success else
2692                                    f"Migration of '{agent}' failed: {data.get('error', '?')}"
2693                                ),
2694                                "severity": sev,
2695                                "timestamp": __import__("time").time(),
2696                            })
2697
2698            except asyncio.CancelledError:
2699                break
2700            except Exception as e:
2701                if self.state.value not in ("stopped", "failed"):
2702                    logger.warning(f"[main] Node heartbeat listener error: {e}. Reconnecting in 5s…")
2703                    await asyncio.sleep(5)
2704
2705    # ── Delegation ─────────────────────────────────────────────────────────
2706
2707    async def delegate_to_installer(self, payload: dict, timeout: float = 300.0) -> dict:
2708        """
2709        Send a task to the installer agent and wait for the result.
2710        Handles node_deploy, node_install, node_run, install, check actions.
2711        timeout is generous (300s) because deploys involve SSH + pip installs.
2712        """
2713        if not self._registry:
2714            return {"error": "No registry available"}
2715        installer = self._registry.find_by_name("installer")
2716        if not installer:
2717            return {"error": "installer agent not found"}
2718
2719        import uuid as _uuid
2720        task_id = f"inst_{_uuid.uuid4().hex[:8]}"
2721        future: asyncio.Future = asyncio.get_event_loop().create_future()
2722        self._result_futures[task_id] = future
2723
2724        payload = dict(payload)
2725        payload["_task_id"] = task_id
2726        payload["task"]     = task_id
2727
2728        await self.send(installer.actor_id, MessageType.TASK, payload)
2729        try:
2730            return await asyncio.wait_for(future, timeout=timeout)
2731        except asyncio.TimeoutError:
2732            return {"error": f"Installer timed out after {timeout}s"}
2733        finally:
2734            self._result_futures.pop(task_id, None)
2735
2736    async def delegate_task(self, target_name: str, task: str, timeout: float = 60.0) -> Optional[dict]:
2737        if not self._registry:
2738            return None
2739        target = self._registry.find_by_name(target_name)
2740        if not target:
2741            return None
2742        future = asyncio.get_event_loop().create_future()
2743        self._result_futures[task] = future
2744        await self.send(target.actor_id, MessageType.TASK, {"text": task, "reply_to": self.actor_id})
2745        try:
2746            return await asyncio.wait_for(future, timeout=timeout)
2747        except asyncio.TimeoutError:
2748            return None
2749        finally:
2750            self._result_futures.pop(task, None)
2751
2752    async def list_agents(self) -> list[dict]:
2753        if not self._registry:
2754            return []
2755        return [a.get_status() for a in self._registry.all_actors()]
2756
2757    async def send_command(self, target_name: str, command: MessageType):
2758        if not self._registry:
2759            return
2760        target = self._registry.find_by_name(target_name)
2761        if target:
2762            await self.send(target.actor_id, command)
2763
2764    async def delete_spawned_agent(self, name: str):
2765        # Find node before removing from registry
2766        reg = self._get_spawn_registry()
2767        node = reg.get(name, {}).get("node", "").strip()
2768
2769        self._remove_from_spawn_registry(name)
2770
2771        # Update desired state so Pi doesn't re-spawn on reconcile
2772        if node:
2773            await self._update_node_desired_state(node, remove_name=name)
2774            await self._mqtt_publish(f"nodes/{node}/stop", {"name": name}, qos=1)
2775
2776        if self._registry:
2777            target = self._registry.find_by_name(name)
2778            if target:
2779                await self._registry.unregister(target.actor_id)
2780                await target.stop()

An Actor that uses an LLM to process tasks. Maintains conversation history and supports tool use.

MainActor( llm_provider: Optional[wactorz.agents.llm_agent.LLMProvider] = None, **kwargs)
795    def __init__(self, llm_provider: Optional[LLMProvider] = None, **kwargs):
796        kwargs.setdefault("name", "main")
797        kwargs.setdefault("system_prompt", ORCHESTRATOR_PROMPT)
798        super().__init__(llm_provider=llm_provider, **kwargs)
799        self._result_futures: dict[str, asyncio.Future] = {}
800        # Queued monitor notifications — prepended to next user response
801        self._pending_notifications: list[dict] = []
802        self.protected = True
803        # Remote node tracking: node_name → {"last_seen": float, "agents": [...]}
804        self._known_nodes: dict[str, dict] = {}
805        # Topic registry: topic → [manifest, ...] — built from agents/+/manifest
806        self._topic_registry: dict[str, list] = {}  # topic → list of agent manifests
807        self._agent_manifests: dict[str, dict] = {}  # agent name → latest manifest (includes schemas)
DESCRIPTION = 'Main orchestrator: spawns agents, routes tasks, manages the multi-agent system'
CAPABILITIES = ['spawn_agent', 'list_agents', 'list_nodes', 'list_topics', 'orchestration']
INTENT_CLASSIFIER_PROMPT = "You are a routing classifier for a smart home AI assistant.\nRespond with exactly one token: ACTUATE, HA, PIPELINE, or OTHER.\n\nACTUATE = immediate one-shot device control in Home Assistant:\n - Turn on/off a device right now\n - Set temperature, dim lights, lock/unlock door\n - Open/close covers or blinds right now\n - Any direct command whose whole purpose is immediate device control\n\nHA = Home Assistant management, listing, or automation CRUD:\n - List devices, areas, entities, automations\n - Create/edit/delete a HA automation\n - Query what devices or automations exist\n\nPIPELINE = a reactive rule that should run continuously:\n - 'if X happens then do Y' — any conditional/reactive logic\n - 'when X send me a message/notification'\n - 'whenever X turns on/off do Y'\n - Any rule involving a sensor state change triggering an action or notification\n - Any webcam/camera detection triggering anything\n - Anything involving Discord/Telegram notifications triggered by an event\n\nOTHER = general conversation, coding, questions, or mixed requests.anything not HA or pipeline related.\n\nImportant:\n- Choose ACTUATE only when the entire request is immediate device control.\n- If the request mixes device control with non-HA tasks, return OTHER.\n- If the request is about automations, listing, discovery, or CRUD, return HA."
protected
async def on_start(self):
811    async def on_start(self):
812        await super().on_start()
813        await self._restore_spawned_agents()
814        # Listen for remote node heartbeats so we know what's online
815        self._tasks.append(asyncio.create_task(self._node_heartbeat_listener()))
816        # Listen for agent capability manifests to build topic registry
817        self._tasks.append(asyncio.create_task(self._manifest_listener()))
818        # Inject persisted user facts into system prompt
819        self._inject_user_facts_into_prompt()

Called when actor starts. Override for init logic.

def get_pipeline_rules(self) -> dict:
843    def get_pipeline_rules(self) -> dict:
844        return self.recall(PIPELINE_RULES_KEY) or {}
def save_pipeline_rule(self, rule: dict):
846    def save_pipeline_rule(self, rule: dict):
847        rules = self.get_pipeline_rules()
848        rules[rule["rule_id"]] = rule
849        self.persist(PIPELINE_RULES_KEY, rules)
850        logger.info(f"[{self.name}] Pipeline rule saved: {rule['rule_id']} agents={rule.get('agents', [])}")
def get_notification_urls(self) -> dict:
852    def get_notification_urls(self) -> dict:
853        """Return persisted notification webhook URLs (discord, telegram, slack, etc.)"""
854        return self.recall("_notification_urls") or {}

Return persisted notification webhook URLs (discord, telegram, slack, etc.)

def get_user_facts(self) -> dict:
872    def get_user_facts(self) -> dict:
873        return self.recall("_user_facts") or {}
async def delete_pipeline_rule(self, rule_id: str) -> str:
916    async def delete_pipeline_rule(self, rule_id: str) -> str:
917        """Stop all agents for a rule and remove it from registry."""
918        rules = self.get_pipeline_rules()
919        rule = rules.get(rule_id)
920        if not rule:
921            return f"No rule found with id '{rule_id}'."
922        agents = rule.get("agents", [])
923        stopped = []
924        for agent_name in agents:
925            self._remove_from_spawn_registry(agent_name)
926            if self._registry:
927                actor = self._registry.find_by_name(agent_name)
928                if actor:
929                    await actor.stop()
930                    await self._registry.unregister(actor.actor_id)
931                    stopped.append(agent_name)
932        del rules[rule_id]
933        self.persist(PIPELINE_RULES_KEY, rules)
934        task_preview = rule.get("task", "")[:60]
935        return f"Rule '{rule_id}' deleted. Stopped agents: {', '.join(stopped) or 'none running'}.\nRule was: {task_preview}"

Stop all agents for a rule and remove it from registry.

async def handle_message(self, msg: Message):
963    async def handle_message(self, msg: Message):
964        if msg.type == MessageType.TASK:
965            # Intercept monitor notifications BEFORE passing to LLM _handle_task
966            if isinstance(msg.payload, dict) and msg.payload.get("_monitor_notification"):
967                self._pending_notifications.append(msg.payload)
968                logger.info(f"[{self.name}] Monitor alert queued: {msg.payload.get('message','')[:80]}")
969                return
970            await self._handle_task(msg)
971
972        elif msg.type == MessageType.RESULT:
973            if isinstance(msg.payload, dict):
974                # Support both key names: "_task_id" (new) and "task" (legacy)
975                fid = msg.payload.get("_task_id") or msg.payload.get("task")
976                if fid and fid in self._result_futures:
977                    fut = self._result_futures[fid]
978                    if not fut.done():
979                        fut.set_result(msg.payload)

Handle messages not caught by default handlers.

async def chat(self, user_message: str) -> str:
1145    async def chat(self, user_message: str) -> str:
1146        response = await super().chat(user_message)
1147        # Fire-and-forget fact extraction — don't block the response
1148        asyncio.create_task(self._extract_and_save_facts(user_message, response))
1149        return response

Direct async call - useful for the main conversation actor.

async def chat_stream(self, user_message: str):
1151    async def chat_stream(self, user_message: str):
1152        full_response = []
1153        async for chunk in super().chat_stream(user_message):
1154            if isinstance(chunk, dict):
1155                yield chunk
1156            else:
1157                full_response.append(chunk)
1158                yield chunk
1159        # Extract facts from completed response
1160        if full_response:
1161            asyncio.create_task(
1162                self._extract_and_save_facts(user_message, "".join(full_response))
1163            )

Streaming version of chat(). Yields text chunks, then a final usage dict. The caller is responsible for printing chunks as they arrive.

Usage: async for chunk in agent.chat_stream("hello"): if isinstance(chunk, dict): usage = chunk # final usage summary else: print(chunk, end="", flush=True)

async def process_user_input(self, text: str) -> str:
1178    async def process_user_input(self, text: str) -> str:
1179        note_prefix = self._drain_notifications()
1180
1181        # ── Direct API intercepts — handle without LLM round-trip ──────────
1182        stripped = text.strip().rstrip("()")
1183
1184        # ── /help ───────────────────────────────────────────────────────────
1185        if stripped in ("/help", "help", "/?"):
1186            return note_prefix + "\n".join([
1187                "**Wactorz commands**",
1188                "",
1189                "**Agents**",
1190                "  /agents               — list all known agents with descriptions and schemas",
1191                "  /agents <keyword>     — filter agents by capability keyword",
1192                "  /capabilities         — alias for /agents",
1193                "  /agents stop <name>   — stop and remove an agent (local or remote)",
1194                "  /agents delete <name> — alias for /agents stop",
1195                "  @agent-name <msg>     — send a message directly to a named agent",
1196                "  @catalog list         — list available catalog recipes",
1197                "  @catalog spawn <n>    — spawn a catalog agent",
1198                "",
1199                "**Nodes**",
1200                "  /nodes                — list remote nodes and their agents",
1201                "  /nodes remove <node>  — stop all agents on a node and remove it",
1202                "",
1203                "**Pipelines**",
1204                "  /rules                — list active pipeline rules",
1205                "  /rules delete <id>    — stop agents and remove a rule",
1206                "",
1207                "**Memory**",
1208                "  /memory               — show stored user facts and conversation summary",
1209                "  /memory clear         — wipe all memory",
1210                "  /memory forget <key>  — remove one stored fact",
1211                "",
1212                "**Notifications**",
1213                "  /webhook discord <url>   — store a Discord webhook URL",
1214                "  /webhook telegram <url>  — store a Telegram webhook URL",
1215                "  /webhook                 — list stored webhook URLs",
1216                "",
1217                "**System**",
1218                "  /nodes                — list remote nodes and their agents",
1219                "  /topics               — list MQTT topics published by known agents",
1220                "  /topics <keyword>     — filter topics by keyword",
1221                "  /bus                  — TopicBus registry: contracts, data flows, wiring pairs",
1222                "  /mqtt                 — MQTT publisher status (connected, queue depth, outbox)",
1223                "  /help                 — show this help",
1224            ])
1225        if stripped in ("main.list_nodes", "list_nodes", "/nodes"):
1226            nodes = self.list_nodes()
1227            if not nodes:
1228                return note_prefix + "No remote nodes seen yet. Deploy one with /deploy <node-name>."
1229            import time as _t
1230            lines = []
1231            for nd in sorted(nodes, key=lambda x: x["node"]):
1232                status   = "🟢 online" if nd["online"] else "🔴 offline"
1233                agents   = ", ".join(nd["agents"]) or "(no agents)"
1234                age      = int(_t.time() - nd["last_seen"])
1235                lines.append(f"  {nd['node']:22s} {status}  |  agents: {agents}  |  last heartbeat: {age}s ago")
1236            return note_prefix + "Remote nodes:\n" + "\n".join(lines) + "\nTo remove a node: /nodes remove <node-name>"
1237
1238        if stripped.startswith("/topics"):
1239            keyword = stripped[7:].strip().lstrip("(").rstrip(")")
1240            topics = self.list_topics(keyword)
1241            if not topics:
1242                msg = f"No topics found" + (f" matching '{keyword}'" if keyword else "") + "."
1243                msg += " Topics are registered automatically when agents publish for the first time."
1244                return note_prefix + msg
1245            lines = [f"Known MQTT topics{' matching ' + repr(keyword) if keyword else ''}:"]
1246            for t in topics:
1247                agent_strs = ", ".join(
1248                    f"{a['name']}" + (f" ({a['node']})" if a.get("node") else "")
1249                    for a in t["agents"]
1250                )
1251                lines.append(f"  {t['topic']:40s}{agent_strs}")
1252            return note_prefix + "\n".join(lines)
1253            
1254        if stripped == "/mqtt":
1255            client = self._mqtt_client
1256            if client is None:
1257                return note_prefix + "MQTT publisher not initialised."
1258            connected   = getattr(client, "connected",   False)
1259            queue_depth = getattr(client, "queue_depth", 0)
1260            client_id   = getattr(client, "_client_id",  "?")
1261            db_path     = getattr(client, "_db_path",    "?")
1262            status_icon = "🟢" if connected else "🔴"
1263            lines = [
1264                f"MQTT Publisher Status:",
1265                f"  {status_icon} connected   : {connected}",
1266                f"  client_id   : {client_id}",
1267                f"  queue_depth : {queue_depth} message(s) pending",
1268                f"  outbox_db   : {db_path}",
1269                f"  QoS 1 topics: nodes/*, agents/by-name/*",
1270                f"  QoS 0 topics: */logs, */metrics, */status, */heartbeat",
1271            ]
1272            if queue_depth > 0:
1273                lines.append(f"  ⚠️  {queue_depth} message(s) queued — will deliver when reconnected")
1274            return note_prefix + "\n".join(lines)
1275
1276        if stripped == "/bus":
1277            try:
1278                from ..core.topic_bus import get_topic_bus
1279                bus = get_topic_bus()
1280                if not bus:
1281                    return note_prefix + "TopicBus not initialised."
1282                summary = bus.registry.summary()
1283                lines = [
1284                    f"TopicBus — Reactive Pub/Sub Registry",
1285                    f"  agents with contracts : {summary['total_agents']}",
1286                    f"  published topics      : {summary['total_published']}",
1287                    f"  subscribed topics     : {summary['total_subscribed']}",
1288                    f"  auto-wiring pairs     : {summary['wiring_pairs']}",
1289                    "",
1290                ]
1291                for c in sorted(summary["agents"], key=lambda x: x["name"]):
1292                    lines.append(f"  [{c['name']}]" + (f" on {c['node']}" if c.get("node") else ""))
1293                    if c["publishes"]:
1294                        lines.append(f"    publishes : {', '.join(c['publishes'])}")
1295                    if c["subscribes"]:
1296                        lines.append(f"    subscribes: {', '.join(c['subscribes'])}")
1297                    if c.get("triggers_when"):
1298                        lines.append(f"    triggers  : {c['triggers_when']}")
1299                pairs = bus.registry.find_wiring_opportunities()
1300                if pairs:
1301                    lines.append("\nAuto-wiring opportunities:")
1302                    for prod, cons, topic in pairs:
1303                        lines.append(f"  {prod.name}{cons.name}  via {topic}")
1304                return note_prefix + "\n".join(lines)
1305            except Exception as e:
1306                return note_prefix + f"TopicBus error: {e}"
1307
1308
1309
1310        # ── Webhook / notification URL management ───────────────────────────
1311        if stripped.startswith("/memory"):
1312            parts = stripped.split(None, 1)
1313            sub = parts[1].strip() if len(parts) > 1 else ""
1314            if sub == "clear":
1315                self.persist("_user_facts", {})
1316                self.persist("history_summary", "")
1317                self._history_summary = ""
1318                self.system_prompt = ORCHESTRATOR_PROMPT
1319                return note_prefix + "Memory cleared — user facts and conversation summary reset."
1320            if sub.startswith("forget "):
1321                key = sub[7:].strip()
1322                facts = self.get_user_facts()
1323                if key in facts:
1324                    del facts[key]
1325                    self.persist("_user_facts", facts)
1326                    self._inject_user_facts_into_prompt()
1327                    return note_prefix + f"Forgotten: '{key}'"
1328                return note_prefix + f"No fact found with key '{key}'."
1329            # Default: show memory
1330            facts = self.get_user_facts()
1331            summary = self._history_summary
1332            lines = []
1333            if facts:
1334                lines.append(f"User facts ({len(facts)}):")
1335                for k, v in facts.items():
1336                    lines.append(f"  {k}: {v}")
1337            else:
1338                lines.append("No user facts stored yet.")
1339            if summary:
1340                lines.append(f"\nConversation summary:\n  {summary[:300]}{'...' if len(summary) > 300 else ''}")
1341            else:
1342                lines.append("\nNo conversation summary yet.")
1343            lines.append("\nCommands: /memory clear | /memory forget <key>")
1344            return note_prefix + "\n".join(lines)
1345
1346        if stripped.startswith("/webhook"):
1347            parts = stripped.split(None, 2)
1348            if len(parts) == 1:
1349                # /webhook — show stored URLs
1350                urls = self.recall("_notification_urls") or {}
1351                if not urls:
1352                    return note_prefix + "No notification URLs stored.\nUse: /webhook discord <url>  or  /webhook telegram <url>"
1353                lines = ["Stored notification URLs:"]
1354                for svc, url in urls.items():
1355                    lines.append(f"  {svc}: {url}")
1356                return note_prefix + "\n".join(lines)
1357            elif len(parts) >= 3:
1358                # /webhook discord <url>
1359                service = parts[1].lower()
1360                url = parts[2].strip()
1361                urls = self.recall("_notification_urls") or {}
1362                urls[service] = url
1363                self.persist("_notification_urls", urls)
1364                return note_prefix + f"Saved {service} webhook URL. Pipelines will use it automatically."
1365            else:
1366                return note_prefix + "Usage: /webhook <service> <url>\nExample: /webhook discord https://discord.com/api/webhooks/..."
1367
1368        # Auto-detect webhook URLs in any message and persist them
1369        import re as _re
1370        _webhook_match = _re.search(
1371            r'https?://(?:discord\.com/api/webhooks|hooks\.slack\.com|api\.telegram\.org)/\S+',
1372            text
1373        )
1374        if _webhook_match:
1375            url = _webhook_match.group(0).rstrip(".,;!)'\"")
1376            urls = self.recall("_notification_urls") or {}
1377            if "discord" in url:
1378                urls["discord"] = url
1379            elif "slack" in url:
1380                urls["slack"] = url
1381            elif "telegram" in url:
1382                urls["telegram"] = url
1383            self.persist("_notification_urls", urls)
1384            logger.info(f"[{self.name}] Auto-saved webhook URL from message")
1385
1386        if stripped in ("/rules", "rules"):
1387            rules = self.get_pipeline_rules()
1388            if not rules:
1389                return note_prefix + "No pipeline rules active.\nDescribe a reactive rule to create one, e.g. 'when the door opens send me a Discord message'."
1390            lines = [f"Active pipeline rules ({len(rules)}):"]
1391            for rule_id, rule in sorted(rules.items(), key=lambda x: x[1].get("created_at", 0)):
1392                agents = rule.get("agents", [])
1393                task = rule.get("task", "")[:80]
1394                import datetime
1395                ts = rule.get("created_at", 0)
1396                created = datetime.datetime.fromtimestamp(ts).strftime("%Y-%m-%d %H:%M") if ts else "unknown"
1397                # Check which agents are running
1398                running_agents = []
1399                stopped_agents = []
1400                for a in agents:
1401                    if self._registry and self._registry.find_by_name(a):
1402                        running_agents.append(a)
1403                    else:
1404                        stopped_agents.append(a)
1405                status = "🟢" if running_agents else "🔴"
1406                lines.append(f"\n{status} [{rule_id}] — {task}")
1407                lines.append(f"   agents  : {', '.join(agents)}")
1408                if stopped_agents:
1409                    lines.append(f"   stopped : {', '.join(stopped_agents)}")
1410                lines.append(f"   created : {created}")
1411            lines.append("\nTo delete a rule: /rules delete <rule_id>")
1412            return note_prefix + "\n".join(lines)
1413
1414        if stripped.startswith("/rules delete "):
1415            rule_id = stripped[len("/rules delete "):].strip()
1416            result = await self.delete_pipeline_rule(rule_id)
1417            return note_prefix + result
1418
1419        # ── /agents stop|delete|pause <name> ───────────────────────────────
1420        for _cmd in ("/agents stop ", "/agents delete ", "/agents pause ", "/agents remove "):
1421            if stripped.startswith(_cmd):
1422                agent_name = stripped[len(_cmd):].strip()
1423                reg        = self._get_spawn_registry()
1424                node       = reg.get(agent_name, {}).get("node", "").strip()
1425
1426                # Remove from spawn registry so it doesn't restore on restart
1427                self._remove_from_spawn_registry(agent_name)
1428
1429                if node:
1430                    # Remote agent — publish stop + clear desired state
1431                    await self._update_node_desired_state(node, remove_name=agent_name)
1432                    await self._mqtt_publish(
1433                        f"nodes/{node}/stop", {"name": agent_name}, qos=1
1434                    )
1435                    return note_prefix + f"Stop signal sent to '{agent_name}' on node '{node}'."
1436                else:
1437                    # Local agent
1438                    if self._registry:
1439                        target = self._registry.find_by_name(agent_name)
1440                        if target:
1441                            await self._registry.unregister(target.actor_id)
1442                            await target.stop()
1443                            return note_prefix + f"Agent '{agent_name}' stopped."
1444                    return note_prefix + f"Agent '{agent_name}' not found locally."
1445
1446        # ── /nodes remove <node> ────────────────────────────────────────────
1447        if stripped.startswith("/nodes remove "):
1448            node_name = stripped[len("/nodes remove "):].strip()
1449            # Clear retained MQTT messages
1450            await self._mqtt_publish(f"nodes/{node_name}/spawn",         b"", retain=True)
1451            await self._mqtt_publish(f"nodes/{node_name}/desired_state", b"", retain=True)
1452            await self._mqtt_publish(f"nodes/{node_name}/stop_all",      {"reason": "removed"}, qos=1)
1453            # Remove all agents for this node from spawn registry
1454            reg     = self._get_spawn_registry()
1455            removed = [n for n, c in reg.items() if c.get("node", "") == node_name]
1456            for n in removed:
1457                self._remove_from_spawn_registry(n)
1458            self._known_nodes.pop(node_name, None)
1459            return note_prefix + (
1460                f"Node '{node_name}' removed. "
1461                f"Cleared {len(removed)} agent(s): {', '.join(removed) or 'none'}. "
1462                f"The node will disappear from /nodes within 30s."
1463            )
1464
1465        # ── /agents / /capabilities ─────────────────────────────────────────
1466        if stripped in ("/agents", "/capabilities") or \
1467                stripped.startswith("/agents ") or stripped.startswith("/capabilities "):
1468            keyword = ""
1469            for prefix in ("/capabilities ", "/agents "):
1470                if stripped.startswith(prefix):
1471                    keyword = stripped[len(prefix):].strip()
1472                    break
1473            caps = self.list_capabilities(keyword)
1474            if not caps:
1475                msg = "No agents found" + (f" matching {repr(keyword)}" if keyword else "") + "."
1476                msg += " Agents publish their capabilities on startup."
1477                return note_prefix + msg
1478            lines = ["Agent capabilities" + (" matching " + repr(keyword) if keyword else "") + ":"]
1479            for a in caps:
1480                running  = "\U0001f7e2" if a["running"] else ("\U0001f4e6" if a["spawnable"] else "\U0001f534")
1481                node_str = f" on {a['node']}" if a.get("node") else ""
1482                lines.append("")
1483                lines.append(f"  {running} [{a['name']}]{node_str}")
1484                lines.append(f"    description : {a['description']}")
1485                if a["capabilities"]:
1486                    lines.append(f"    capabilities: {', '.join(a['capabilities'])}")
1487                if a["input_schema"]:
1488                    lines.append(f"    input       : {a['input_schema']}")
1489                if a["output_schema"]:
1490                    lines.append(f"    output      : {a['output_schema']}")
1491                if a["spawnable"]:
1492                    lines.append(f"    spawnable   : yes — @catalog spawn {a['name']}")
1493            lines.append("\nLegend: \U0001f7e2 running  \U0001f4e6 spawnable (not yet running)  \U0001f534 stopped")
1494            lines.append("Filter: /agents <keyword>   e.g. /agents discord")
1495            return note_prefix + "\n".join(lines)
1496
1497                # ── @mention direct routing ─────────────────────────────────────────
1498        if text.startswith("@"):
1499            # Extract agent name and message: "@cpu-monitor-rpi-room what is the cpu?"
1500            parts       = text.split(None, 1)
1501            target_name = parts[0].lstrip("@").rstrip(":,")
1502            message     = parts[1].strip() if len(parts) > 1 else text
1503
1504            # Try local registry first
1505            local_target = self._registry.find_by_name(target_name) if self._registry else None
1506            if not local_target:
1507                # Not running — check if it's a spawnable catalog recipe
1508                manifest = self._agent_manifests.get(target_name, {})
1509                if manifest.get("spawnable") and manifest.get("catalog"):
1510                    catalog_name  = manifest["catalog"]
1511                    catalog_actor = self._registry.find_by_name(catalog_name) if self._registry else None
1512                    if catalog_actor and hasattr(catalog_actor, "_action_spawn"):
1513                        logger.info(f"[main] '{target_name}' not running — auto-spawning via {catalog_name}...")
1514                        try:
1515                            spawn_result = await catalog_actor._action_spawn(target_name, {})
1516                            if spawn_result and spawn_result.get("ok"):
1517                                await asyncio.sleep(0.5)
1518                                local_target = self._registry.find_by_name(target_name) if self._registry else None
1519                                logger.info(f"[main] '{target_name}' spawned, routing task...")
1520                            else:
1521                                err = spawn_result.get("message", "unknown error") if spawn_result else "no response"
1522                                return note_prefix + f"Could not spawn '{target_name}': {err}"
1523                        except Exception as e:
1524                            return note_prefix + f"Could not spawn '{target_name}': {e}"
1525
1526            if local_target:
1527                result = await self.delegate_task(target_name, message, timeout=60.0)
1528                if result:
1529                    reply = result.get("result") or result.get("response") or str(result)
1530                    return note_prefix + f"**{target_name}**: {reply}"
1531                return note_prefix + f"{target_name} did not respond."
1532
1533            # Check if it's a known remote agent
1534            remote_node = None
1535            for node_name, nd in self._known_nodes.items():
1536                if target_name in nd.get("agents", []):
1537                    remote_node = node_name
1538                    break
1539
1540            if remote_node:
1541                # Send via MQTT and wait for reply
1542                import time as _t
1543                reply_topic = f"main/reply/{self.actor_id}/{uuid.uuid4().hex[:8]}"
1544                future: asyncio.Future = asyncio.get_event_loop().create_future()
1545                self._result_futures[reply_topic] = future
1546
1547                await self._mqtt_publish(
1548                    f"agents/by-name/{target_name}/task",
1549                    {"text": message, "_reply_topic": reply_topic,
1550                     "_remote_task": True, "payload": message},
1551                )
1552
1553                # Subscribe briefly for the reply
1554                async def _wait_reply():
1555                    try:
1556                        import aiomqtt
1557                        async with aiomqtt.Client(self._mqtt_broker, self._mqtt_port) as client:
1558                            await client.subscribe(reply_topic)
1559                            async for msg in client.messages:
1560                                try:
1561                                    data = json.loads(msg.payload.decode())
1562                                    if not future.done():
1563                                        future.set_result(data)
1564                                except Exception:
1565                                    pass
1566                                return
1567                    except Exception as e:
1568                        if not future.done():
1569                            future.set_exception(e)
1570
1571                reply_task = asyncio.create_task(_wait_reply())
1572                try:
1573                    result = await asyncio.wait_for(asyncio.shield(future), timeout=30.0)
1574                    reply_task.cancel()
1575                    reply = result.get("result") or result.get("response") or str(result)
1576                    return note_prefix + f"**{target_name}** (on {remote_node}): {reply}"
1577                except asyncio.TimeoutError:
1578                    reply_task.cancel()
1579                    return note_prefix + f"{target_name} on {remote_node} did not respond within 30s."
1580                finally:
1581                    self._result_futures.pop(reply_topic, None)
1582
1583            # Not found locally or remotely
1584            known_remote = [a for nd in self._known_nodes.values() for a in nd.get("agents", [])]
1585            if known_remote:
1586                return note_prefix + (f"Agent '{target_name}' not found. "
1587                    f"Remote agents: {', '.join(known_remote)}")
1588            return note_prefix + f"Agent '{target_name}' not found."
1589
1590        # Explicit planner prefix always wins
1591        lowered = text.lower()
1592        if any(lowered.startswith(p) for p in (
1593            "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ",
1594            "@planner", "set up a pipeline", "create a rule", "set up a rule",
1595        )):
1596            result = await self._run_planner(text)
1597            return note_prefix + (result or "Planner did not return a result. Please retry.")
1598
1599        # Single LLM call classifies intent: ACTUATE, HA, PIPELINE (reactive rule), OTHER
1600        intent = await self._classify_intent(text)
1601        logger.info(f"[{self.name}] Intent: {intent}{text[:60]}")
1602
1603        if intent == "PIPELINE":
1604            result = await self._run_planner(text)
1605            return note_prefix + (result or "Planner did not return a result. Please retry.")
1606            
1607        if intent == "ACTUATE":
1608            return note_prefix + await self._handle_actuate_intent(text)
1609
1610        if intent == "HA":
1611            result = await self.delegate_task("home-assistant-agent", text, timeout=120.0)
1612            if result and isinstance(result, dict) and result.get("result"):
1613                return note_prefix + str(result["result"])
1614            if not result:
1615                return note_prefix + "I could not reach the Home Assistant agent right now. Please retry."
1616            return note_prefix + "The Home Assistant agent did not return a result. Please retry."
1617
1618        response = await self.chat(text)
1619
1620        # If the LLM wrote agent code but forgot the <spawn> wrapper, remind it once
1621        has_spawn   = "<spawn>" in response
1622        has_code    = "async def handle_task" in response or "async def setup" in response
1623        asked_spawn = any(w in text.lower() for w in ("spawn", "create", "make", "build", "add", "agent"))
1624        if has_code and not has_spawn and asked_spawn:
1625            logger.info(f"[{self.name}] Code written without <spawn> — prompting to wrap it")
1626            response = await self.chat(
1627                "You wrote agent code but forgot to wrap it in a <spawn> block. "
1628                "Please output the complete spawn block now with that exact code inside it. "
1629                "Output ONLY the <spawn>...</spawn> block, nothing else."
1630            )
1631
1632        clean, spawned = await self._process_spawn_commands(response)
1633
1634        # Execute any @agent-name {payload} delegation patterns the LLM produced
1635        clean = await self._execute_llm_delegations(clean)
1636
1637        await self._mqtt_publish(
1638            f"agents/{self.actor_id}/logs",
1639            {"type": "user_interaction", "input": text[:100], "response": clean[:200]},
1640        )
1641
1642        if spawned:
1643            bg_names   = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)]
1644            live_names = [a.name for a in spawned if not isinstance(a, _SpawnPlaceholder)]
1645            parts = []
1646            if live_names:
1647                replaced = '"replace": true' in response or '"replace":true' in response
1648                action   = "Replaced" if replaced else "Spawned"
1649                parts.append(f"{action} {', '.join(live_names)}")
1650            if bg_names:
1651                parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly")
1652            if parts:
1653                clean += f"\n\n[System: {' | '.join(parts)} — will auto-restore on restart]"
1654
1655        return note_prefix + clean
async def process_user_input_stream(self, text: str):
1657    async def process_user_input_stream(self, text: str):
1658        """
1659        Streaming version of process_user_input().
1660        Yields text chunks as the LLM generates them, then a final dict:
1661          {"done": True, "spawned": [...names...], "system_msg": "..."}
1662
1663        The CLI calls this and prints chunks immediately.
1664        REST/Discord/WhatsApp should use process_user_input() instead.
1665        """
1666        # Drain monitor notifications first
1667        note_prefix = self._drain_notifications()
1668        if note_prefix:
1669            yield note_prefix
1670
1671        # All slash-commands and direct API intercepts are handled by process_user_input
1672        # Route them there to avoid duplicating all that logic here
1673        _stripped = text.strip().rstrip("()")
1674        _is_command = (
1675            _stripped.startswith("/")
1676            or _stripped in ("list_nodes", "main.list_nodes", "rules")
1677            or _stripped.startswith("@")
1678        )
1679        if _is_command:
1680            result = await self.process_user_input(text)
1681            yield result
1682            yield {"done": True, "spawned": [], "system_msg": ""}
1683            return
1684
1685        # Explicit planner prefix always wins
1686        _lowered = text.lower()
1687        if any(_lowered.startswith(p) for p in (
1688            "coordinate:", "coordinate ", "plan:", "pipeline:", "pipeline ",
1689            "@planner", "set up a pipeline", "create a rule", "set up a rule",
1690        )):
1691            result = await self._run_planner(text)
1692            yield result or "Planner did not return a result. Please retry."
1693            yield {"done": True, "spawned": [], "system_msg": ""}
1694            return
1695
1696        # Single LLM call classifies intent: ACTUATE, HA, PIPELINE, or OTHER
1697        intent = await self._classify_intent(text)
1698        logger.info(f"[{self.name}] Intent: {intent}{text[:60]}")
1699
1700        if intent == "PIPELINE":
1701            result = await self._run_planner(text)
1702            yield result or "Planner did not return a result. Please retry."
1703            yield {"done": True, "spawned": [], "system_msg": ""}
1704            return
1705            
1706        if intent == "ACTUATE":
1707            result = await self._handle_actuate_intent(text)
1708            yield result
1709            yield {"done": True, "spawned": [], "system_msg": ""}
1710            return
1711
1712        if intent == "HA":
1713            result = await self.delegate_task("home-assistant-agent", text, timeout=120.0)
1714            if result and isinstance(result, dict) and result.get("result"):
1715                yield str(result["result"])
1716            elif not result:
1717                yield "I could not reach the Home Assistant agent right now. Please retry."
1718            else:
1719                yield "The Home Assistant agent did not return a result. Please retry."
1720            yield {"done": True, "spawned": [], "system_msg": ""}
1721            return
1722
1723        # Stream the LLM response chunk by chunk
1724        full_chunks = []
1725        async for chunk in self.chat_stream(text):
1726            if isinstance(chunk, dict):
1727                break   # usage dict — discard, already tracked inside chat_stream
1728            full_chunks.append(chunk)
1729            yield chunk
1730
1731        full_response = "".join(full_chunks)
1732
1733        # Process any <spawn> blocks in the completed response
1734        _, spawned = await self._process_spawn_commands(full_response)
1735
1736        # Execute any @agent-name {payload} delegation patterns the LLM produced
1737        # If delegations ran, yield the results as an additional chunk
1738        delegated = await self._execute_llm_delegations(full_response)
1739        if delegated != full_response:
1740            # Find what changed and yield just the new parts
1741            import re as _re
1742            results = _re.findall(r'[✅❌]\s+\S+.*', delegated)
1743            if results:
1744                yield "\n" + "\n".join(results)
1745        full_response = delegated
1746
1747        system_msg = ""
1748        if spawned:
1749            names      = ", ".join(f"'{a.name}'" for a in spawned if not isinstance(a, _SpawnPlaceholder))
1750            bg_names   = [a.name for a in spawned if isinstance(a, _SpawnPlaceholder)]
1751            parts = []
1752            if names:
1753                replaced = '"replace": true' in full_response or '"replace":true' in full_response
1754                parts.append(f"{'Replaced' if replaced else 'Spawned'} {names} — will auto-restore on restart")
1755            if bg_names:
1756                parts.append(f"Installing packages for {', '.join(bg_names)} — will appear shortly")
1757            system_msg = " | ".join(parts)
1758
1759        await self._mqtt_publish(
1760            f"agents/{self.actor_id}/logs",
1761            {"type": "user_interaction", "input": text[:100], "response": full_response[:200]},
1762        )
1763
1764        yield {"done": True, "spawned": spawned, "system_msg": system_msg}

Streaming version of process_user_input(). Yields text chunks as the LLM generates them, then a final dict: {"done": True, "spawned": [...names...], "system_msg": "..."}

The CLI calls this and prints chunks immediately. REST/Discord/WhatsApp should use process_user_input() instead.

async def run_pipeline( self, goal: str, agents: list[str], timeout: float = 300.0, force_replan: bool = False) -> dict:
2289    async def run_pipeline(self, goal: str, agents: list[str], timeout: float = 300.0, force_replan: bool = False) -> dict:
2290        """
2291        Spawn an ephemeral TaskManager to coordinate a multi-agent pipeline.
2292        Returns the final synthesised result without blocking main's context.
2293
2294        Usage:
2295            result = await main.run_pipeline(
2296                goal="Find the Philips EP2220 manual and answer: how do I descale it?",
2297                agents=["manual-agent", "installer"]
2298            )
2299        """
2300        from .task_manager import TaskManager
2301        import uuid
2302
2303        task_id = uuid.uuid4().hex[:8]
2304        future  = asyncio.get_event_loop().create_future()
2305        self._result_futures[task_id] = future
2306
2307        mgr = await self.spawn(
2308            TaskManager,
2309            goal=goal,
2310            available_agents=agents,
2311            llm_provider=self.llm,
2312            reply_to_id=self.actor_id,
2313            reply_task_id=task_id,
2314            auto_destroy=True,
2315            force_replan=force_replan,
2316            cache_dir=str(self._persistence_dir.parent / "plan_cache"),
2317            persistence_dir=str(self._persistence_dir.parent),
2318        )
2319
2320        logger.info(f"[{self.name}] Pipeline started: {mgr.name} for goal: {goal[:60]}")
2321
2322        try:
2323            result = await asyncio.wait_for(future, timeout=timeout)
2324            return result
2325        except asyncio.TimeoutError:
2326            logger.warning(f"[{self.name}] Pipeline timed out after {timeout}s")
2327            return {"error": f"Pipeline timed out after {timeout}s"}
2328        finally:
2329            self._result_futures.pop(task_id, None)

Spawn an ephemeral TaskManager to coordinate a multi-agent pipeline. Returns the final synthesised result without blocking main's context.

Usage: result = await main.run_pipeline( goal="Find the Philips EP2220 manual and answer: how do I descale it?", agents=["manual-agent", "installer"] )

def list_nodes(self) -> list[dict]:
2471    def list_nodes(self) -> list[dict]:
2472        """Return all known remote nodes with their last-seen time and running agents."""
2473        import time as _time
2474        now = _time.time()
2475        return [
2476            {
2477                "node":      name,
2478                "agents":    info.get("agents", []),
2479                "last_seen": info.get("last_seen", 0),
2480                "online":    (now - info.get("last_seen", 0)) < 30,
2481            }
2482            for name, info in self._known_nodes.items()
2483        ]

Return all known remote nodes with their last-seen time and running agents.

def list_topics(self, keyword: str = '') -> list[dict]:
2485    def list_topics(self, keyword: str = "") -> list[dict]:
2486        """
2487        Return all known MQTT topics published by agents, optionally filtered by keyword.
2488        Each entry: {"topic": str, "agents": [{"name", "node", "description"}, ...]}
2489
2490        Example:
2491            list_topics("cpu")     → topics containing "cpu"
2492            list_topics("temp")    → topics containing "temp"
2493            list_topics()          → all topics
2494        """
2495        results = []
2496        kw = keyword.lower()
2497        for topic, manifests in self._topic_registry.items():
2498            if kw and kw not in topic.lower():
2499                continue
2500            results.append({
2501                "topic":   topic,
2502                "agents":  [{"name": m.get("name"), "node": m.get("node"),
2503                             "description": m.get("description", "")} for m in manifests],
2504            })
2505        return sorted(results, key=lambda x: x["topic"])

Return all known MQTT topics published by agents, optionally filtered by keyword. Each entry: {"topic": str, "agents": [{"name", "node", "description"}, ...]}

Example: list_topics("cpu") → topics containing "cpu" list_topics("temp") → topics containing "temp" list_topics() → all topics

def list_capabilities(self, keyword: str = '') -> list[dict]:
2507    def list_capabilities(self, keyword: str = "") -> list[dict]:
2508        """
2509        Return all known agents with their full capability profile:
2510        name, description, capabilities, input_schema, output_schema.
2511
2512        Example:
2513            list_capabilities()            → all agents
2514            list_capabilities("weather")   → agents with "weather" in description/capabilities
2515        """
2516        results = []
2517        kw = keyword.lower().strip()
2518        # Support multi-word keywords — match if ANY word appears in the haystack
2519        kw_words = kw.split() if kw else []
2520        for name, manifest in self._agent_manifests.items():
2521            desc  = manifest.get("description", "")
2522            caps  = manifest.get("capabilities", [])
2523            # Filter by keyword across description, capabilities, and name
2524            if kw_words:
2525                haystack = desc.lower() + " " + " ".join(caps).lower() + " " + name.lower()
2526                if not any(w in haystack for w in kw_words):
2527                    continue
2528            results.append({
2529                "name":          name,
2530                "node":          manifest.get("node"),
2531                "description":   desc,
2532                "capabilities":  caps,
2533                "input_schema":  manifest.get("input_schema",  {}),
2534                "output_schema": manifest.get("output_schema", {}),
2535                "spawnable":     manifest.get("spawnable", False),
2536                "running":       bool(self._registry and self._registry.find_by_name(name)),
2537            })
2538        return sorted(results, key=lambda x: x["name"])

Return all known agents with their full capability profile: name, description, capabilities, input_schema, output_schema.

Example: list_capabilities() → all agents list_capabilities("weather") → agents with "weather" in description/capabilities

async def migrate_agent(self, agent_name: str, target_node: str) -> dict:
2587    async def migrate_agent(self, agent_name: str, target_node: str) -> dict:
2588        """
2589        Move a running agent to a different node.
2590
2591        If the agent is local: saves updated config (with new node) and re-spawns remotely.
2592        If the agent is remote: publishes a migrate command to its current node.
2593        Returns {"success": bool, "message": str}
2594        """
2595        import time as _time
2596
2597        reg = self._get_spawn_registry()
2598        config = reg.get(agent_name)
2599        if not config:
2600            return {"success": False, "message": f"Agent '{agent_name}' not in spawn registry."}
2601
2602        current_node = config.get("node", "").strip()
2603
2604        if current_node == target_node:
2605            return {"success": False, "message": f"Agent '{agent_name}' is already on '{target_node}'."}
2606
2607        if current_node:
2608            # ── Remote → Remote migration ────────────────────────────────────
2609            logger.info(f"[{self.name}] Migrating '{agent_name}' from node '{current_node}' → '{target_node}'")
2610            await self._mqtt_publish(
2611                f"nodes/{current_node}/migrate",
2612                {"name": agent_name, "target_node": target_node},
2613            )
2614        else:
2615            # ── Local → Remote migration ─────────────────────────────────────
2616            logger.info(f"[{self.name}] Migrating LOCAL agent '{agent_name}' → remote node '{target_node}'")
2617
2618            # Stop the local instance
2619            if self._registry:
2620                local = self._registry.find_by_name(agent_name)
2621                if local:
2622                    try:
2623                        await self._registry.unregister(local.actor_id)
2624                        await local.stop()
2625                        await asyncio.sleep(0.3)
2626                    except Exception as e:
2627                        logger.warning(f"[{self.name}] Could not stop local '{agent_name}': {e}")
2628
2629            # Update config with new node target and re-spawn remotely
2630            new_config = dict(config)
2631            new_config["node"] = target_node
2632            new_config.pop("replace", None)
2633
2634            await self._spawn_remote(new_config, target_node, save=True)
2635
2636        # Update spawn registry so next restart re-spawns to the right node
2637        updated = dict(config)
2638        updated["node"] = target_node
2639        self._save_to_spawn_registry(updated)
2640
2641        msg = (f"Migrating '{agent_name}' from '{current_node or 'local'}' "
2642               f"→ '{target_node}'. It will appear in the dashboard shortly.")
2643        logger.info(f"[{self.name}] {msg}")
2644        return {"success": True, "message": msg}

Move a running agent to a different node.

If the agent is local: saves updated config (with new node) and re-spawns remotely. If the agent is remote: publishes a migrate command to its current node. Returns {"success": bool, "message": str}

async def delegate_to_installer(self, payload: dict, timeout: float = 300.0) -> dict:
2707    async def delegate_to_installer(self, payload: dict, timeout: float = 300.0) -> dict:
2708        """
2709        Send a task to the installer agent and wait for the result.
2710        Handles node_deploy, node_install, node_run, install, check actions.
2711        timeout is generous (300s) because deploys involve SSH + pip installs.
2712        """
2713        if not self._registry:
2714            return {"error": "No registry available"}
2715        installer = self._registry.find_by_name("installer")
2716        if not installer:
2717            return {"error": "installer agent not found"}
2718
2719        import uuid as _uuid
2720        task_id = f"inst_{_uuid.uuid4().hex[:8]}"
2721        future: asyncio.Future = asyncio.get_event_loop().create_future()
2722        self._result_futures[task_id] = future
2723
2724        payload = dict(payload)
2725        payload["_task_id"] = task_id
2726        payload["task"]     = task_id
2727
2728        await self.send(installer.actor_id, MessageType.TASK, payload)
2729        try:
2730            return await asyncio.wait_for(future, timeout=timeout)
2731        except asyncio.TimeoutError:
2732            return {"error": f"Installer timed out after {timeout}s"}
2733        finally:
2734            self._result_futures.pop(task_id, None)

Send a task to the installer agent and wait for the result. Handles node_deploy, node_install, node_run, install, check actions. timeout is generous (300s) because deploys involve SSH + pip installs.

async def delegate_task(self, target_name: str, task: Any, timeout: float = 60.0):
316    async def _delegate_task_with_normalized_key(self, target_name: str, task: Any, timeout: float = 60.0):
317        if not self._registry:
318            return None
319        target = self._registry.find_by_name(target_name)
320        if not target:
321            return None
322
323        task_key = _normalize_delegate_task_key(task)
324        future = asyncio.get_event_loop().create_future()
325        self._result_futures[task_key] = future
326        await self.send(
327            target.actor_id,
328            MessageType.TASK,
329            {"text": task, "task": task_key, "reply_to": self.actor_id},
330        )
331        try:
332            return await asyncio.wait_for(future, timeout=timeout)
333        except asyncio.TimeoutError:
334            return None
335        finally:
336            self._result_futures.pop(task_key, None)

The type of the None singleton.

async def list_agents(self) -> list[dict]:
2752    async def list_agents(self) -> list[dict]:
2753        if not self._registry:
2754            return []
2755        return [a.get_status() for a in self._registry.all_actors()]
async def send_command(self, target_name: str, command: MessageType):
2757    async def send_command(self, target_name: str, command: MessageType):
2758        if not self._registry:
2759            return
2760        target = self._registry.find_by_name(target_name)
2761        if target:
2762            await self.send(target.actor_id, command)
async def delete_spawned_agent(self, name: str):
2764    async def delete_spawned_agent(self, name: str):
2765        # Find node before removing from registry
2766        reg = self._get_spawn_registry()
2767        node = reg.get(name, {}).get("node", "").strip()
2768
2769        self._remove_from_spawn_registry(name)
2770
2771        # Update desired state so Pi doesn't re-spawn on reconcile
2772        if node:
2773            await self._update_node_desired_state(node, remove_name=name)
2774            await self._mqtt_publish(f"nodes/{node}/stop", {"name": name}, qos=1)
2775
2776        if self._registry:
2777            target = self._registry.find_by_name(name)
2778            if target:
2779                await self._registry.unregister(target.actor_id)
2780                await target.stop()
class MonitorActor(wactorz.Actor):
 29class MonitorActor(Actor):
 30
 31    def __init__(
 32        self,
 33        check_interval:    float = 15.0,
 34        heartbeat_timeout: float = 60.0,
 35        auto_restart:      bool  = False,
 36        **kwargs,
 37    ):
 38        kwargs.setdefault("name", "monitor")
 39        super().__init__(**kwargs)
 40        self.check_interval    = check_interval
 41        self.heartbeat_timeout = heartbeat_timeout
 42        self.auto_restart      = auto_restart
 43        self.protected         = True
 44
 45        self._last_seen:      dict[str, float] = {}
 46        self._alert_state:    dict[str, bool]  = {}
 47
 48        # Error event registry: actor_id → latest error event dict
 49        self._error_registry: dict[str, dict]  = {}
 50        # Cooldown: actor_id → last time we notified main about it
 51        self._last_notified:  dict[str, float] = {}
 52        # Track which actors we've attempted to restart this session
 53        self._restart_attempts: dict[str, int] = {}
 54
 55    async def on_start(self):
 56        if self._registry:
 57            now = time.time()
 58            for actor in self._registry.all_actors():
 59                if actor.actor_id != self.actor_id:
 60                    self._last_seen[actor.actor_id] = now
 61
 62        self._tasks.append(asyncio.create_task(self._monitor_loop()))
 63        logger.info(f"[{self.name}] Monitor started. check_interval={self.check_interval}s")
 64
 65    # ── Message handling ───────────────────────────────────────────────────
 66
 67    async def handle_message(self, msg: Message):
 68        # Heartbeat — any message counts as alive
 69        if msg.sender_id and msg.sender_id != self.actor_id:
 70            self._last_seen[msg.sender_id] = time.time()
 71            if self._alert_state.get(msg.sender_id):
 72                logger.info(f"[{self.name}] Actor {msg.sender_id[:8]} recovered.")
 73                self._alert_state[msg.sender_id] = False
 74
 75        # Structured error event from agents/{id}/errors (routed via MQTT bridge)
 76        if msg.type == MessageType.TASK and isinstance(msg.payload, dict):
 77            if msg.payload.get("_monitor_error_event"):
 78                await self._handle_error_event(msg.payload)
 79
 80    # ── Monitor loop ───────────────────────────────────────────────────────
 81
 82    async def _monitor_loop(self):
 83        while self.state not in (ActorState.STOPPED, ActorState.FAILED):
 84            try:
 85                await asyncio.sleep(self.check_interval)
 86                await self._ping_all_actors()
 87                await self._check_all_actors()
 88                await self._check_error_registry()
 89                await self._publish_system_health()
 90            except asyncio.CancelledError:
 91                break
 92            except Exception as e:
 93                logger.error(f"[{self.name}] Monitor loop error: {e}")
 94
 95    async def _ping_all_actors(self):
 96        if not self._registry:
 97            return
 98        for actor in self._registry.all_actors():
 99            if actor.actor_id != self.actor_id:
100                try:
101                    await self.send(actor.actor_id, MessageType.STATUS_REQUEST, None)
102                except Exception:
103                    pass
104
105    async def _check_all_actors(self):
106        if not self._registry:
107            return
108        now = time.time()
109        for actor in self._registry.all_actors():
110            if actor.actor_id == self.actor_id:
111                continue
112            if actor.actor_id not in self._last_seen:
113                self._last_seen[actor.actor_id] = now
114                continue
115            if actor.state == ActorState.RUNNING:
116                start_age = now - (actor.metrics.start_time or now)
117                if start_age < self.heartbeat_timeout:
118                    self._last_seen[actor.actor_id] = max(
119                        self._last_seen[actor.actor_id], now - start_age
120                    )
121            # Heartbeat fires every 10s — use as secondary liveness signal
122            hb = getattr(actor.metrics, "last_heartbeat", None)
123            if hb and hb > self._last_seen.get(actor.actor_id, 0):
124                self._last_seen[actor.actor_id] = hb
125
126            gap = now - self._last_seen[actor.actor_id]
127            if gap > self.heartbeat_timeout and actor.state == ActorState.RUNNING:
128                if not self._alert_state.get(actor.actor_id):
129                    self._alert_state[actor.actor_id] = True
130                    await self._fire_heartbeat_alert(actor, gap)
131                    if self.auto_restart:
132                        await self._attempt_restart(actor, reason="heartbeat timeout")
133            else:
134                if self._alert_state.get(actor.actor_id) and gap <= self.heartbeat_timeout:
135                    self._alert_state[actor.actor_id] = False
136
137    # ── Error event handling ───────────────────────────────────────────────
138
139    async def _handle_error_event(self, event: dict):
140        """
141        Called when an agent publishes a structured error.
142        Decides: log / restart / escalate to user.
143        """
144        actor_id = event.get("actor_id", "")
145        name     = event.get("name", actor_id[:8])
146        phase    = event.get("phase", "unknown")
147        error    = event.get("error", "")
148        severity = event.get("severity", "warning")
149        fatal    = event.get("fatal", False)
150        degraded = event.get("degraded", False)
151        consec   = event.get("consecutive", 1)
152
153        # Store in registry for health checks
154        self._error_registry[actor_id] = event
155
156        logger.warning(
157            f"[{self.name}] Error event from '{name}': "
158            f"phase={phase} severity={severity} consecutive={consec}"
159        )
160
161        # ── Recovery decision ──────────────────────────────────────────────
162        if fatal:
163            # Bad code / setup failure — restart won't help without a fix
164            msg = (
165                f"**{name}** failed during *{phase}* and cannot run: `{error}`. "
166                f"The agent needs its code fixed before it can be used."
167            )
168            await self._notify_main(actor_id, name, msg, severity="critical")
169            await self._fire_error_alert(event)
170
171        elif severity == "critical" or degraded:
172            # Repeated runtime errors — try a restart
173            actor = self._find_actor(actor_id)
174            if actor and self._restart_attempts.get(actor_id, 0) < 3:
175                self._restart_attempts[actor_id] = self._restart_attempts.get(actor_id, 0) + 1
176                restarted = await self._attempt_restart(actor, reason=f"{phase} error (attempt {self._restart_attempts[actor_id]})")
177                if restarted:
178                    msg = (
179                        f"**{name}** kept crashing in *{phase}* ({consec}x), "
180                        f"so I restarted it. Latest error: `{error}`."
181                    )
182                else:
183                    msg = (
184                        f"**{name}** is crashing repeatedly in *{phase}* "
185                        f"and I couldn't restart it. Error: `{error}`."
186                    )
187            else:
188                attempts = self._restart_attempts.get(actor_id, 0)
189                msg = (
190                    f"**{name}** has failed {consec} times in *{phase}* "
191                    f"(restart attempted {attempts}x). Error: `{error}`. "
192                    f"It may need its code fixed."
193                )
194            await self._notify_main(actor_id, name, msg, severity="critical")
195            await self._fire_error_alert(event)
196
197        else:
198            # Single warning — log and let agent recover on its own
199            await self._fire_error_alert(event)
200
201    async def _check_error_registry(self):
202        """Periodically re-notify main about persistently degraded agents."""
203        now = time.time()
204        for actor_id, event in list(self._error_registry.items()):
205            last = self._last_notified.get(actor_id, 0)
206            if event.get("degraded") and (now - last) > _NOTIFY_COOLDOWN:
207                actor = self._find_actor(actor_id)
208                name  = event.get("name", actor_id[:8])
209                # If agent has recovered (error count reset), clean up registry
210                if actor and hasattr(actor, "_consecutive_errors") and actor._consecutive_errors == 0:
211                    del self._error_registry[actor_id]
212                    await self._notify_main(
213                        actor_id, name,
214                        f"**{name}** has recovered and is running normally again. ✅",
215                        severity="info",
216                    )
217
218    # ── User notification ──────────────────────────────────────────────────
219
220    async def _notify_main(
221        self,
222        actor_id: str,
223        agent_name: str,
224        message: str,
225        severity: str = "warning",
226    ):
227        """
228        Send a structured notification to MainActor so it can relay to the user
229        in natural language during their next interaction (or immediately if idle).
230        """
231        now = time.time()
232        cooldown = self._last_notified.get(actor_id, 0)
233        if (now - cooldown) < _NOTIFY_COOLDOWN and severity != "info":
234            return   # Don't spam
235
236        self._last_notified[actor_id] = now
237
238        if not self._registry:
239            return
240        main = self._registry.find_by_name("main")
241        if not main:
242            return
243
244        try:
245            await self.send(main.actor_id, MessageType.TASK, {
246                "_monitor_notification": True,
247                "agent_name":  agent_name,
248                "message":     message,
249                "severity":    severity,
250                "timestamp":   now,
251            })
252            logger.info(f"[{self.name}] Notified main about '{agent_name}': {message[:80]}")
253        except Exception as e:
254            logger.error(f"[{self.name}] Failed to notify main: {e}")
255
256    # ── Alerting ───────────────────────────────────────────────────────────
257
258    async def _fire_heartbeat_alert(self, actor: Actor, gap: float):
259        alert = {
260            "actor_id":      actor.actor_id,
261            "name":          actor.name,
262            "last_seen_ago": gap,
263            "state":         actor.state.value,
264            "timestamp":     time.time(),
265            "severity":      "warning" if gap < 120 else "critical",
266        }
267        logger.warning(f"[{self.name}] ALERT: {actor.name} unresponsive for {gap:.0f}s")
268        await self._mqtt_publish(f"agents/{actor.actor_id}/alert", alert)
269
270        # Notify main only for user-spawned agents
271        _infra = {"monitor", "installer", "main", "code-agent",
272                  "anomaly-detector", "home-assistant-agent"}
273        if actor.name not in _infra:
274            await self._notify_main(
275                actor.actor_id,
276                actor.name,
277                f"**{actor.name}** has been unresponsive for {gap:.0f}s.",
278                severity="warning",
279            )
280
281    async def _fire_error_alert(self, event: dict):
282        await self._mqtt_publish(
283            f"agents/{event.get('actor_id', 'unknown')}/alert",
284            {
285                "actor_id":  event.get("actor_id"),
286                "name":      event.get("name"),
287                "message":   f"[{event.get('phase')}] {event.get('error')}",
288                "severity":  event.get("severity", "warning"),
289                "timestamp": time.time(),
290            },
291        )
292
293    # ── Restart ────────────────────────────────────────────────────────────
294
295    async def _attempt_restart(self, actor: Actor, reason: str = "") -> bool:
296        logger.info(f"[{self.name}] Restarting '{actor.name}' — reason: {reason}")
297        try:
298            if actor.state != ActorState.STOPPED:
299                await actor.stop()
300                await asyncio.sleep(0.5)
301            await actor.start()
302            self._last_seen[actor.actor_id] = time.time()
303            logger.info(f"[{self.name}] '{actor.name}' restarted successfully.")
304            return True
305        except Exception as e:
306            logger.error(f"[{self.name}] Restart of '{actor.name}' failed: {e}")
307            return False
308
309    # ── Helpers ────────────────────────────────────────────────────────────
310
311    def _find_actor(self, actor_id: str) -> Optional[Actor]:
312        if not self._registry:
313            return None
314        for a in self._registry.all_actors():
315            if a.actor_id == actor_id:
316                return a
317        return None
318
319    async def _publish_system_health(self):
320        if not self._registry:
321            return
322        now    = time.time()
323        actors = self._registry.all_actors()
324        health = {
325            "timestamp":    now,
326            "total_actors": len(actors),
327            "running":  sum(1 for a in actors if a.state == ActorState.RUNNING),
328            "stopped":  sum(1 for a in actors if a.state == ActorState.STOPPED),
329            "failed":   sum(1 for a in actors if a.state == ActorState.FAILED),
330            "degraded": len(self._error_registry),
331            "actors": [
332                {
333                    "id":            a.actor_id,
334                    "name":          a.name,
335                    "state":         a.state.value,
336                    "last_seen_ago": now - self._last_seen.get(a.actor_id, now),
337                    "consecutive_errors": getattr(a, "_consecutive_errors", 0),
338                    "error_phase":        getattr(a, "_error_phase", ""),
339                }
340                for a in actors
341            ],
342        }
343        await self._mqtt_publish("system/health", health)

Base Actor class. All agents inherit from this. Actors are fully async and communicate only through messages.

MonitorActor( check_interval: float = 15.0, heartbeat_timeout: float = 60.0, auto_restart: bool = False, **kwargs)
31    def __init__(
32        self,
33        check_interval:    float = 15.0,
34        heartbeat_timeout: float = 60.0,
35        auto_restart:      bool  = False,
36        **kwargs,
37    ):
38        kwargs.setdefault("name", "monitor")
39        super().__init__(**kwargs)
40        self.check_interval    = check_interval
41        self.heartbeat_timeout = heartbeat_timeout
42        self.auto_restart      = auto_restart
43        self.protected         = True
44
45        self._last_seen:      dict[str, float] = {}
46        self._alert_state:    dict[str, bool]  = {}
47
48        # Error event registry: actor_id → latest error event dict
49        self._error_registry: dict[str, dict]  = {}
50        # Cooldown: actor_id → last time we notified main about it
51        self._last_notified:  dict[str, float] = {}
52        # Track which actors we've attempted to restart this session
53        self._restart_attempts: dict[str, int] = {}
check_interval
heartbeat_timeout
auto_restart
protected
async def on_start(self):
55    async def on_start(self):
56        if self._registry:
57            now = time.time()
58            for actor in self._registry.all_actors():
59                if actor.actor_id != self.actor_id:
60                    self._last_seen[actor.actor_id] = now
61
62        self._tasks.append(asyncio.create_task(self._monitor_loop()))
63        logger.info(f"[{self.name}] Monitor started. check_interval={self.check_interval}s")

Called when actor starts. Override for init logic.

async def handle_message(self, msg: Message):
67    async def handle_message(self, msg: Message):
68        # Heartbeat — any message counts as alive
69        if msg.sender_id and msg.sender_id != self.actor_id:
70            self._last_seen[msg.sender_id] = time.time()
71            if self._alert_state.get(msg.sender_id):
72                logger.info(f"[{self.name}] Actor {msg.sender_id[:8]} recovered.")
73                self._alert_state[msg.sender_id] = False
74
75        # Structured error event from agents/{id}/errors (routed via MQTT bridge)
76        if msg.type == MessageType.TASK and isinstance(msg.payload, dict):
77            if msg.payload.get("_monitor_error_event"):
78                await self._handle_error_event(msg.payload)

Handle messages not caught by default handlers.

CodeAgent
class ManualAgent(wactorz.Actor):
 37class ManualAgent(Actor):
 38    """
 39    Pre-defined agent that finds, downloads, and answers questions from device manuals.
 40    Requires: httpx  (+ pdfplumber or pymupdf for PDF extraction)
 41    """
 42
 43    def __init__(self, llm_provider=None, **kwargs):
 44        kwargs.setdefault("name", "manual-agent")
 45        super().__init__(**kwargs)
 46        self.llm              = llm_provider
 47        self._manual_text:    Optional[str]  = None
 48        self._manual_device:  Optional[str]  = None
 49        self._manual_url:     Optional[str]  = None
 50        self._manual_pages:   int            = 0
 51
 52    def _current_task_description(self) -> str:
 53        if self._manual_device:
 54            return f"loaded: {self._manual_device}"
 55        return "idle — no manual loaded"
 56
 57    async def on_start(self):
 58        await self._mqtt_publish(
 59            f"agents/{self.actor_id}/logs",
 60            {"type": "log", "message": "Manual agent ready. Send {action: load_manual, device: ...} to begin.", "timestamp": time.time()},
 61        )
 62        logger.info(f"[{self.name}] Ready.")
 63
 64    # ── Direct chat() entry point (used by CLIInterface) ───────────────────
 65
 66    async def chat(self, message: str) -> str:
 67        """
 68        Synchronous-style entry point for CLIInterface and other direct callers.
 69        Parses the message as JSON payload or plain-text question, executes the
 70        action, and returns a human-readable string response.
 71        """
 72        payload = None
 73        stripped = message.strip()
 74        if stripped.startswith("{"):
 75            try:
 76                payload = json.loads(stripped)
 77            except json.JSONDecodeError:
 78                pass
 79
 80        if payload and isinstance(payload, dict):
 81            result = await self._handle_task_payload(payload)
 82        else:
 83            if self._manual_text:
 84                result = await self._ask(stripped)
 85            else:
 86                result = {
 87                    "error": "No manual loaded yet.",
 88                    "hint": 'Send: {"action": "load_manual", "device": "Your Device Model"}',
 89                }
 90
 91        return self._format_result(result)
 92
 93    def _format_result(self, result: dict) -> str:
 94        """Turn a result dict into a readable string for chat output."""
 95        if "error" in result:
 96            msg = result["error"]
 97            hint = result.get("hint", "")
 98            return f"[error] {msg}\n{hint}".strip()
 99
100        if "answer" in result:
101            return result["answer"]
102
103        if result.get("success"):
104            return (
105                f"Manual loaded: {result.get('device', '?')}\n"
106                f"  URL:   {result.get('url', '?')}\n"
107                f"  Pages: {result.get('pages', '?')}\n"
108                f"  Chars: {result.get('chars', '?'):,}\n"
109                f"  Preview: {result.get('preview', '')[:200]}"
110            )
111
112        if "status" in result:
113            if result["status"] == "cleared":
114                return "Manual cleared."
115            if result["status"] == "loaded":
116                return (
117                    f"Loaded: {result.get('device', '?')} "
118                    f"({result.get('pages', '?')} pages, {result.get('chars', '?'):,} chars)"
119                )
120            return result.get("message", str(result))
121
122        return str(result)
123
124    # ── Message-based entry point (actor mailbox) ──────────────────────────
125
126    async def handle_message(self, msg: Message):
127        if msg.type == MessageType.TASK:
128            try:
129                result = await self._handle_task(msg)
130            except Exception as e:
131                logger.error(f"[{self.name}] Task handling failed: {e}", exc_info=True)
132                result = {"error": f"Internal error: {e}"}
133
134            target = msg.reply_to or msg.sender_id
135            if target:
136                await self.send(target, MessageType.RESULT, result)
137            else:
138                logger.warning(
139                    f"[{self.name}] No reply target (reply_to={msg.reply_to!r}, "
140                    f"sender_id={msg.sender_id!r}). Result discarded: {result}"
141                )
142
143    async def _handle_task(self, msg: Message) -> dict:
144        payload = msg.payload if isinstance(msg.payload, dict) else {}
145        if not isinstance(msg.payload, dict):
146            text = str(msg.payload).strip()
147            if text:
148                return await self._ask(text)
149            return {"error": "Send a dict payload with 'action' key"}
150
151        return await self._handle_task_payload(payload)
152
153    async def _handle_task_payload(self, payload: dict) -> dict:
154        """Core task dispatcher — shared by both chat() and handle_message()."""
155        action = payload.get("action", "").lower()
156
157        if action == "load_manual":
158            device = payload.get("device") or payload.get("query", "")
159            if not device:
160                return {"error": "Missing 'device' field"}
161            return await self._load_manual(device)
162
163        if action == "ask":
164            question = payload.get("question") or payload.get("query") or payload.get("text", "")
165            if not question:
166                return {"error": "Missing 'question' field"}
167            return await self._ask(question)
168
169        if action == "status":
170            return self._status()
171
172        if action == "clear":
173            self._manual_text   = None
174            self._manual_device = None
175            self._manual_url    = None
176            self._manual_pages  = 0
177            return {"status": "cleared"}
178
179        if "question" in payload or "query" in payload:
180            return await self._ask(payload.get("question") or payload.get("query", ""))
181
182        return {
183            "error": f"Unknown action: '{action}'",
184            "supported": ["load_manual", "ask", "status", "clear"],
185        }
186
187    # ── Load manual ────────────────────────────────────────────────────────
188
189    async def _load_manual(self, device: str) -> dict:
190        await self._log(f"Searching for manual: {device}")
191
192        loop    = asyncio.get_event_loop()
193        pdf_url = await loop.run_in_executor(None, lambda: self._search_for_manual(device))
194
195        if not pdf_url:
196            await self._alert(f"No PDF manual found for: {device}", "warning")
197            return {"error": f"Could not find a PDF manual for: {device}"}
198
199        await self._log(f"Found: {pdf_url}")
200
201        pdf_bytes = await self._download_pdf(pdf_url)
202        if not pdf_bytes:
203            return {"error": f"Failed to download PDF from: {pdf_url}"}
204
205        size_kb = len(pdf_bytes) // 1024
206        await self._log(f"Downloaded {size_kb} KB — extracting text...")
207
208        text, pages = await loop.run_in_executor(None, lambda: self._extract_text(pdf_bytes))
209        if not text:
210            return {"error": "PDF has no extractable text (may be a scanned image PDF)."}
211
212        self._manual_text   = text
213        self._manual_device = device
214        self._manual_url    = pdf_url
215        self._manual_pages  = pages
216
217        await self._log(f"Manual loaded: {device}{pages} pages, {len(text):,} chars")
218        await self._publish_status()
219
220        return {
221            "success": True,
222            "device":  device,
223            "url":     pdf_url,
224            "pages":   pages,
225            "chars":   len(text),
226            "preview": text[:300].replace("\n", " ").strip(),
227        }
228
229    # ── Search ─────────────────────────────────────────────────────────────
230
231    def _search_for_manual(self, device: str) -> Optional[str]:
232        try:
233            import httpx
234        except ImportError:
235            logger.error(f"[{self.name}] httpx is not installed — cannot search for manuals")
236            return None
237
238        headers = {
239            "User-Agent":      "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36",
240            "Accept-Language": "en-US,en;q=0.9",
241            "Accept":          "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
242        }
243
244        # ── Pass 1: direct Philips document server (model number pattern) ──
245        model_m = re.search(r'EP\d{4}', device, re.IGNORECASE)
246        if model_m:
247            model = model_m.group(0).upper()
248            ml    = model.lower()
249            direct_urls = [
250                f"https://www.download.p4c.philips.com/files/e/{ml}/{ml}_pss_aenghk.pdf",
251                f"https://www.download.p4c.philips.com/files/e/{ml}_31/{ml}_31_pss_aenghk.pdf",
252                f"https://www.download.p4c.philips.com/files/e/{ml}/{ml}_user_manual_en.pdf",
253                f"https://www.documents.philips.com/doclib/enc/fetch/2000/4504/261257/261271/User_Manual_{model}.pdf",
254            ]
255            try:
256                with httpx.Client(follow_redirects=True, timeout=10, headers=headers) as client:
257                    for url in direct_urls:
258                        try:
259                            r = client.head(url)
260                            ct = r.headers.get("content-type", "")
261                            if r.status_code == 200 and ("pdf" in ct or url.endswith(".pdf")):
262                                logger.info(f"[{self.name}] Direct URL works: {url}")
263                                return url
264                        except Exception as e:
265                            logger.debug(f"[{self.name}] Direct URL failed ({url}): {e}")
266                            continue
267            except Exception as e:
268                logger.warning(f"[{self.name}] Philips direct check failed: {e}")
269
270        # ── Pass 2: DDGS search ────────────────────────────────────────────
271        result = self._search_ddgs(device)
272        if result:
273            return result
274
275        # ── Pass 3: Bing scrape (with redirect URL decoding) ───────────────
276        result = self._search_bing_scrape(device, headers)
277        if result:
278            return result
279
280        # ── Pass 4: Google scrape fallback ─────────────────────────────────
281        result = self._search_google_scrape(device, headers)
282        if result:
283            return result
284
285        logger.warning(f"[{self.name}] All search passes exhausted — no manual found for: {device}")
286        return None
287
288    # ── Pass 2: DDGS ──────────────────────────────────────────────────────
289
290    def _search_ddgs(self, device: str) -> Optional[str]:
291        queries = [
292            f"{device} user manual filetype:pdf",
293            f"{device} user manual PDF manualslib OR manualzz",
294            f"{device} owner manual PDF download",
295        ]
296
297        def get_url(r):
298            return r.get("href") or r.get("url") or r.get("link") or ""
299
300        try:
301            try:
302                from ddgs import DDGS
303                logger.info(f"[{self.name}] Pass 2: using ddgs package")
304            except ImportError:
305                from duckduckgo_search import DDGS
306                logger.info(f"[{self.name}] Pass 2: using duckduckgo_search (deprecated)")
307
308            with DDGS() as ddgs:
309                for query in queries:
310                    try:
311                        results = list(ddgs.text(query, max_results=15))
312                        logger.info(f"[{self.name}] Pass 2 query: {query!r}{len(results)} results")
313
314                        for i, r in enumerate(results[:5]):
315                            logger.info(
316                                f"[{self.name}]   [{i}] url={get_url(r)!r} "
317                                f"title={r.get('title', '')[:60]!r}"
318                            )
319
320                        match = self._pick_best_url(results, get_url)
321                        if match:
322                            logger.info(f"[{self.name}] Pass 2 HIT: {match}")
323                            return match
324
325                    except Exception as e:
326                        logger.warning(f"[{self.name}] DDGS query failed ({query}): {e}")
327                        continue
328        except ImportError:
329            logger.warning(f"[{self.name}] Neither ddgs nor duckduckgo_search installed — skipping")
330
331        return None
332
333    # ── Pass 3: Bing scrape ───────────────────────────────────────────────
334
335    def _search_bing_scrape(self, device: str, headers: dict) -> Optional[str]:
336        import httpx
337
338        queries = [
339            f"{device} user manual PDF",
340            f"{device} manual PDF manualslib OR manualzz",
341        ]
342
343        try:
344            with httpx.Client(follow_redirects=True, timeout=15, headers=headers) as client:
345                for query in queries:
346                    try:
347                        url  = "https://www.bing.com/search?q=" + urllib.parse.quote(query)
348                        r    = client.get(url)
349                        urls = self._extract_bing_urls(r.text)
350
351                        logger.info(f"[{self.name}] Pass 3 query: {query!r}{len(urls)} real URLs")
352                        for i, u in enumerate(urls[:10]):
353                            logger.info(f"[{self.name}]   [{i}] {u}")
354
355                        # Build fake result dicts so we can reuse _pick_best_url
356                        results = [{"href": u, "title": "", "body": ""} for u in urls]
357                        match   = self._pick_best_url(results, lambda r: r["href"])
358                        if match:
359                            logger.info(f"[{self.name}] Pass 3 HIT: {match}")
360                            return match
361
362                    except Exception as e:
363                        logger.warning(f"[{self.name}] Bing query failed ({query}): {e}")
364                        continue
365        except Exception as e:
366            logger.warning(f"[{self.name}] Bing scrape failed entirely: {e}")
367
368        return None
369
370    # ── Pass 4: Google scrape ─────────────────────────────────────────────
371
372    def _search_google_scrape(self, device: str, headers: dict) -> Optional[str]:
373        import httpx
374
375        queries = [
376            f"{device} user manual PDF",
377            f"{device} manual filetype:pdf",
378        ]
379
380        try:
381            with httpx.Client(follow_redirects=True, timeout=15, headers=headers) as client:
382                for query in queries:
383                    try:
384                        url = "https://www.google.com/search?q=" + urllib.parse.quote(query)
385                        r   = client.get(url)
386                        urls = self._extract_google_urls(r.text)
387
388                        logger.info(f"[{self.name}] Pass 4 query: {query!r}{len(urls)} real URLs")
389                        for i, u in enumerate(urls[:10]):
390                            logger.info(f"[{self.name}]   [{i}] {u}")
391
392                        results = [{"href": u, "title": "", "body": ""} for u in urls]
393                        match   = self._pick_best_url(results, lambda r: r["href"])
394                        if match:
395                            logger.info(f"[{self.name}] Pass 4 HIT: {match}")
396                            return match
397
398                    except Exception as e:
399                        logger.warning(f"[{self.name}] Google query failed ({query}): {e}")
400                        continue
401        except Exception as e:
402            logger.warning(f"[{self.name}] Google scrape failed entirely: {e}")
403
404        return None
405
406    # ── URL extraction helpers ─────────────────────────────────────────────
407
408    @staticmethod
409    def _extract_bing_urls(html: str) -> list[str]:
410        """
411        Extract real destination URLs from Bing search results HTML.
412        Bing wraps links as /ck/a?...&u=a1<base64url>...  — we decode those.
413        Also picks up any direct href links that aren't bing/microsoft.
414        """
415        urls = []
416        seen = set()
417
418        # Method 1: decode Bing redirect URLs  (/ck/a?...u=a1<base64>...)
419        for m in re.finditer(r'href="https?://www\.bing\.com/ck/a\?[^"]*?u=a1([A-Za-z0-9_-]+)[^"]*"', html):
420            try:
421                encoded = m.group(1)
422                # Fix base64url padding
423                padded  = encoded + "=" * (4 - len(encoded) % 4)
424                decoded = base64.urlsafe_b64decode(padded).decode("utf-8", errors="ignore")
425                if decoded.startswith("http") and decoded not in seen:
426                    seen.add(decoded)
427                    urls.append(decoded)
428            except Exception:
429                continue
430
431        # Method 2: direct hrefs that aren't search engine domains
432        for m in re.finditer(r'href=["\'](https?://[^"\'<>\s]+)', html):
433            link = m.group(1)
434            if not any(d in link for d in _SEARCH_ENGINE_DOMAINS) and link not in seen:
435                seen.add(link)
436                urls.append(link)
437
438        return urls
439
440    @staticmethod
441    def _extract_google_urls(html: str) -> list[str]:
442        """
443        Extract real destination URLs from Google search results HTML.
444        Google wraps links as /url?q=<url>&... — we extract the q parameter.
445        """
446        urls = []
447        seen = set()
448
449        # Method 1: Google redirect links
450        for m in re.finditer(r'/url\?q=(https?://[^&"]+)', html):
451            try:
452                decoded = urllib.parse.unquote(m.group(1))
453                if not any(d in decoded for d in _SEARCH_ENGINE_DOMAINS) and decoded not in seen:
454                    seen.add(decoded)
455                    urls.append(decoded)
456            except Exception:
457                continue
458
459        # Method 2: direct hrefs
460        for m in re.finditer(r'href=["\'](https?://[^"\'<>\s]+)', html):
461            link = m.group(1)
462            if not any(d in link for d in _SEARCH_ENGINE_DOMAINS) and link not in seen:
463                seen.add(link)
464                urls.append(link)
465
466        return urls
467
468    # ── Shared URL ranking ─────────────────────────────────────────────────
469
470    def _pick_best_url(self, results: list[dict], get_url_fn) -> Optional[str]:
471        """
472        From a list of search results, pick the best manual URL.
473        Priority: direct .pdf link > trusted site > any link with 'manual' + 'pdf' signals.
474        """
475        # Tier 1: direct .pdf link
476        for r in results:
477            u = get_url_fn(r)
478            if u.lower().endswith(".pdf"):
479                return u
480
481        # Tier 2: trusted manual site
482        for r in results:
483            u = get_url_fn(r)
484            if any(t in u for t in TRUSTED_SITES):
485                # ManualsLib pages need /download.pdf appended
486                if "manualslib.com" in u and not u.endswith(".pdf"):
487                    return u.rstrip("/") + "/download.pdf"
488                return u
489
490        # Tier 3: URL contains 'manual' or 'pdf' (but not a search engine)
491        for r in results:
492            u = get_url_fn(r)
493            u_lower = u.lower()
494            if u.startswith("http") and ("manual" in u_lower or "pdf" in u_lower):
495                if not any(d in u for d in _SEARCH_ENGINE_DOMAINS):
496                    return u
497
498        # Tier 4: body/title mentions 'pdf' or 'manual'
499        for r in results:
500            u = get_url_fn(r)
501            text = (r.get("body", "") + r.get("title", "")).lower()
502            if ("pdf" in text or "manual" in text) and u.startswith("http"):
503                if not any(d in u for d in _SEARCH_ENGINE_DOMAINS):
504                    return u
505
506        return None
507
508    # ── Download ───────────────────────────────────────────────────────────
509
510    async def _download_pdf(self, url: str) -> Optional[bytes]:
511        try:
512            import httpx
513        except ImportError:
514            logger.error(f"[{self.name}] httpx is not installed — cannot download PDF")
515            return None
516
517        headers = {
518            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 Chrome/120.0.0.0 Safari/537.36"
519        }
520        try:
521            async with httpx.AsyncClient(follow_redirects=True, timeout=60, headers=headers) as client:
522                resp = await client.get(url)
523                if resp.status_code != 200:
524                    logger.warning(f"[{self.name}] Download returned status {resp.status_code} for: {url}")
525                    return None
526                ct = resp.headers.get("content-type", "")
527                if "pdf" in ct or resp.content[:4] == b"%PDF":
528                    return resp.content
529                # HTML — hunt for embedded PDF link
530                links = re.findall(r'https?://[^\s"\'<>]+\.pdf', resp.text, re.IGNORECASE)
531                if links:
532                    logger.info(f"[{self.name}] Following embedded PDF link: {links[0]}")
533                    r2 = await client.get(links[0])
534                    if r2.status_code == 200 and r2.content[:4] == b"%PDF":
535                        return r2.content
536                logger.warning(f"[{self.name}] URL did not return a PDF: {url} (content-type: {ct})")
537        except Exception as e:
538            logger.warning(f"[{self.name}] Download failed for {url}: {e}")
539        return None
540
541    # ── Extract text ───────────────────────────────────────────────────────
542
543    def _extract_text(self, pdf_bytes: bytes) -> tuple[str, int]:
544        import io
545        try:
546            import pdfplumber
547            parts = []
548            with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
549                pages = len(pdf.pages)
550                for p in pdf.pages:
551                    t = p.extract_text()
552                    if t:
553                        parts.append(t)
554            if parts:
555                return "\n".join(parts), pages
556        except ImportError:
557            logger.warning(f"[{self.name}] pdfplumber not installed — trying pymupdf")
558        except Exception as e:
559            logger.warning(f"[{self.name}] pdfplumber extraction failed: {e}")
560
561        try:
562            import fitz
563            doc   = fitz.open(stream=pdf_bytes, filetype="pdf")
564            parts = [p.get_text() for p in doc]
565            return "\n".join(t for t in parts if t), len(doc)
566        except ImportError:
567            logger.error(f"[{self.name}] Neither pdfplumber nor pymupdf (fitz) installed — cannot extract text")
568        except Exception as e:
569            logger.warning(f"[{self.name}] pymupdf extraction failed: {e}")
570
571        return "", 0
572
573    # ── Ask ────────────────────────────────────────────────────────────────
574
575    async def _ask(self, question: str) -> dict:
576        if not self._manual_text:
577            return {
578                "error":  "No manual loaded yet.",
579                "hint":   'Send: {"action": "load_manual", "device": "Your Device Model"}',
580            }
581        if not self.llm:
582            return {"error": "No LLM configured on this agent."}
583
584        await self._log(f"Answering: {question}")
585
586        chunks  = self._chunk_text(self._manual_text, 600, 100)
587        ranked  = self._rank_chunks(chunks, question)[:6]
588        context = "\n\n---\n\n".join(ranked)
589
590        prompt = (
591            f"You are a helpful assistant. Answer the question below using ONLY the provided manual excerpt.\n\n"
592            f"Device: {self._manual_device}\n\n"
593            f"Manual excerpt:\n{context[:6000]}\n\n"
594            f"Question: {question}\n\n"
595            f"Give a clear, step-by-step answer based on the manual. "
596            f"If the manual doesn't contain the answer, say so."
597        )
598
599        if hasattr(self.llm, "complete"):
600            response, _ = await self.llm.complete(
601                messages=[{"role": "user", "content": prompt}],
602                system="You answer questions strictly based on provided manual content.",
603            )
604        else:
605            response = str(self.llm)
606
607        return {
608            "device":   self._manual_device,
609            "question": question,
610            "answer":   response,
611        }
612
613    # ── Status ─────────────────────────────────────────────────────────────
614
615    def _status(self) -> dict:
616        if not self._manual_device:
617            return {"status": "idle", "message": "No manual loaded."}
618        return {
619            "status":  "loaded",
620            "device":  self._manual_device,
621            "url":     self._manual_url,
622            "pages":   self._manual_pages,
623            "chars":   len(self._manual_text or ""),
624        }
625
626    # ── Helpers ────────────────────────────────────────────────────────────
627
628    _STOPWORDS = {
629        'how','do','i','the','a','an','is','are','what','where','when','why',
630        'can','does','to','for','of','in','on','at','my','this','that','it',
631        'its','with','and','or','be','was','will','has','have','use','using',
632        'get','me','please','tell','about','there','their','they','we','you',
633        'your','which','make','need',
634    }
635
636    def _keywords(self, text: str) -> list[str]:
637        words = re.findall(r'[a-z]+', text.lower())
638        return [w for w in words if w not in self._STOPWORDS and len(w) > 2]
639
640    def _chunk_text(self, text: str, chunk_size=600, overlap=100) -> list[str]:
641        words  = text.split()
642        chunks = []
643        i = 0
644        while i < len(words):
645            chunks.append(" ".join(words[i:i + chunk_size]))
646            i += chunk_size - overlap
647        return chunks
648
649    def _rank_chunks(self, chunks: list[str], question: str) -> list[str]:
650        kws    = self._keywords(question)
651        scored = [(sum(c.lower().count(kw) for kw in kws), c) for c in chunks]
652        scored.sort(key=lambda x: x[0], reverse=True)
653        return [c for _, c in scored]
654
655    # ── MQTT helpers ───────────────────────────────────────────────────────
656
657    async def _log(self, msg: str):
658        logger.info(f"[{self.name}] {msg}")
659        await self._mqtt_publish(
660            f"agents/{self.actor_id}/logs",
661            {"type": "log", "message": msg, "timestamp": time.time()},
662        )
663
664    async def _alert(self, msg: str, severity: str = "warning"):
665        logger.warning(f"[{self.name}] ALERT: {msg}")
666        await self._mqtt_publish(
667            f"agents/{self.actor_id}/alerts",
668            {"message": msg, "severity": severity, "timestamp": time.time()},
669        )

Pre-defined agent that finds, downloads, and answers questions from device manuals. Requires: httpx (+ pdfplumber or pymupdf for PDF extraction)

ManualAgent(llm_provider=None, **kwargs)
43    def __init__(self, llm_provider=None, **kwargs):
44        kwargs.setdefault("name", "manual-agent")
45        super().__init__(**kwargs)
46        self.llm              = llm_provider
47        self._manual_text:    Optional[str]  = None
48        self._manual_device:  Optional[str]  = None
49        self._manual_url:     Optional[str]  = None
50        self._manual_pages:   int            = 0
llm
async def on_start(self):
57    async def on_start(self):
58        await self._mqtt_publish(
59            f"agents/{self.actor_id}/logs",
60            {"type": "log", "message": "Manual agent ready. Send {action: load_manual, device: ...} to begin.", "timestamp": time.time()},
61        )
62        logger.info(f"[{self.name}] Ready.")

Called when actor starts. Override for init logic.

async def chat(self, message: str) -> str:
66    async def chat(self, message: str) -> str:
67        """
68        Synchronous-style entry point for CLIInterface and other direct callers.
69        Parses the message as JSON payload or plain-text question, executes the
70        action, and returns a human-readable string response.
71        """
72        payload = None
73        stripped = message.strip()
74        if stripped.startswith("{"):
75            try:
76                payload = json.loads(stripped)
77            except json.JSONDecodeError:
78                pass
79
80        if payload and isinstance(payload, dict):
81            result = await self._handle_task_payload(payload)
82        else:
83            if self._manual_text:
84                result = await self._ask(stripped)
85            else:
86                result = {
87                    "error": "No manual loaded yet.",
88                    "hint": 'Send: {"action": "load_manual", "device": "Your Device Model"}',
89                }
90
91        return self._format_result(result)

Synchronous-style entry point for CLIInterface and other direct callers. Parses the message as JSON payload or plain-text question, executes the action, and returns a human-readable string response.

async def handle_message(self, msg: Message):
126    async def handle_message(self, msg: Message):
127        if msg.type == MessageType.TASK:
128            try:
129                result = await self._handle_task(msg)
130            except Exception as e:
131                logger.error(f"[{self.name}] Task handling failed: {e}", exc_info=True)
132                result = {"error": f"Internal error: {e}"}
133
134            target = msg.reply_to or msg.sender_id
135            if target:
136                await self.send(target, MessageType.RESULT, result)
137            else:
138                logger.warning(
139                    f"[{self.name}] No reply target (reply_to={msg.reply_to!r}, "
140                    f"sender_id={msg.sender_id!r}). Result discarded: {result}"
141                )

Handle messages not caught by default handlers.

class PlannerAgent(wactorz.Actor):
  37class PlannerAgent(Actor):
  38    """
  39    On-demand orchestrator. Spawned per complex task, self-terminates when done.
  40    """
  41
  42    def __init__(
  43        self,
  44        llm_provider:   Optional[LLMProvider] = None,
  45        task:           str = "",
  46        reply_to_id:    str = "",
  47        reply_task_id:  str = "",
  48        auto_terminate: bool = True,
  49        **kwargs,
  50    ):
  51        kwargs.setdefault("name", "planner")
  52        super().__init__(**kwargs)
  53        self.llm              = llm_provider
  54        self._task            = task
  55        self._reply_to_id     = reply_to_id
  56        self._reply_task_id   = reply_task_id
  57        self._auto_terminate  = auto_terminate
  58        self._result_futures: dict[str, asyncio.Future] = {}
  59        self._spawned_by_planner: list[str] = []   # agents we created this run
  60
  61    def _current_task_description(self) -> str:
  62        return self._task[:60] if self._task else "waiting for task"
  63
  64    # ── Lifecycle ──────────────────────────────────────────────────────────
  65
  66    async def on_start(self):
  67        await self._log(f"Planner ready. Task: {self._task[:80]}")
  68        if self._task:
  69            asyncio.create_task(self._report_plan(self._task))
  70
  71    # ── Message handling ───────────────────────────────────────────────────
  72
  73    async def handle_message(self, msg: Message):
  74        if msg.type == MessageType.TASK:
  75            payload   = msg.payload if isinstance(msg.payload, dict) else {"text": str(msg.payload)}
  76            task_text = payload.get("text") or payload.get("task") or str(msg.payload)
  77            self._reply_to_id = payload.get("_reply_to") or msg.reply_to or msg.sender_id or self._reply_to_id
  78            task_id           = payload.get("_task_id")
  79            await self._log(f"Received task: {task_text[:80]}")
  80            result = await self._run_plan(task_text)
  81            if self._reply_to_id:
  82                # Use the initiating task_id (from main) so the future resolves,
  83                # falling back to the message-level task_id if present
  84                resolve_id = self._reply_task_id or task_id
  85                reply = {"result": result, "text": result}
  86                if resolve_id:
  87                    reply["_task_id"] = resolve_id
  88                if self._spawned_by_planner:
  89                    reply["spawned"] = self._spawned_by_planner
  90                await self.send(self._reply_to_id, MessageType.RESULT, reply)
  91
  92        elif msg.type == MessageType.RESULT:
  93            payload = msg.payload if isinstance(msg.payload, dict) else {}
  94            task_id = payload.get("_task_id")
  95            if task_id and task_id in self._result_futures:
  96                fut = self._result_futures[task_id]
  97                if not fut.done():
  98                    fut.set_result(payload)
  99
 100    # ── Report wrapper (on_start path) ────────────────────────────────────
 101
 102    async def _report_plan(self, task: str):
 103        """Run the plan and report the result back to main (used when task set at spawn time)."""
 104        result = await self._run_plan(task)
 105        if self._reply_to_id:
 106            reply = {"result": result, "text": result}
 107            if self._reply_task_id:
 108                reply["_task_id"] = self._reply_task_id
 109            if self._spawned_by_planner:
 110                reply["spawned"] = self._spawned_by_planner
 111            await self.send(self._reply_to_id, MessageType.RESULT, reply)
 112
 113    # ── Core pipeline ──────────────────────────────────────────────────────
 114
 115    # ── Pipeline registry ──────────────────────────────────────────────────
 116    # Each pipeline rule is stored here so users can list / delete them later.
 117    # Stored in persistent state under key "_pipeline_rules".
 118    #
 119    # Schema per rule:
 120    # {
 121    #   "rule_id":    str,       # unique slug
 122    #   "task":       str,       # original user request
 123    #   "agents":     [str],     # names of spawned agents for this rule
 124    #   "created_at": float,
 125    # }
 126
 127    def _load_pipeline_rules(self) -> list[dict]:
 128        return self.recall("_pipeline_rules") or []
 129
 130    def _save_pipeline_rule(self, rule: dict):
 131        rules = self._load_pipeline_rules()
 132        rules = [r for r in rules if r.get("rule_id") != rule["rule_id"]]
 133        rules.append(rule)
 134        self.persist("_pipeline_rules", rules)
 135
 136    # ── Pipeline detection & dispatch ──────────────────────────────────────
 137
 138    def _is_pipeline_request(task: str) -> bool:
 139        """
 140        Detect reactive/persistent pipeline requests vs one-shot tasks.
 141        Pipelines use conditional/temporal language: if/when/whenever/monitor/watch/notify.
 142        Also catches explicit spawn/continuous-agent requests like:
 143          "spawn an agent to log the mean..."
 144          "create an agent that subscribes to..."
 145          "I want an agent to send to a topic random temp..."
 146        """
 147        import re
 148        lowered = task.lower()
 149
 150        # Explicit pipeline prefix always wins
 151        if lowered.startswith("pipeline:") or lowered.startswith("pipeline "):
 152            return True
 153
 154        patterns = [
 155            r"\bif\b.*\bthen\b",
 156            r"\bif\b.*\b(send|notify|alert|turn|open|close|post|message)\b",
 157            r"\bwhen\b.*\b(detect|open|turn|send|notify|alert|is|becomes|goes|changes)\b",
 158            r"\bwhenever\b",
 159            r"\bmonitor\b", r"\bwatch\b",
 160            r"\balert me\b", r"\bnotify me\b",
 161            r"\bsend me\b.*\b(when|if|discord|message|notification)\b",
 162            r"\bsend me a\b",
 163            r"\bautomatically\b",
 164            r"\bevery time\b", r"\bon detection\b",
 165            r"\bis turned on\b", r"\bis turned off\b",
 166            r"\bturns on\b", r"\bturns off\b",
 167            r"\bopens\b.*\b(send|notify|alert|light|turn)\b",
 168            r"\b(door|window|sensor|lamp|light|temperature|humidity|motion)\b.*\b(send|notify|discord|message)\b",
 169            # camera/detect + action = pipeline
 170            r"\b(camera|detect|yolo|webcam)\b.*\b(turn|open|send|notify|alert)\b",
 171            r"\b(person|motion|object)\b.*\bdetect.*\b(turn|open|light|send)\b",
 172            # ── Spawn / continuous agent requests ──
 173            # "spawn an agent to...", "create an agent that...", "I want an agent to..."
 174            r"\b(spawn|create|make|start|run|launch|deploy)\b.*\bagent\b",
 175            r"\b(i\s+want|i\s+need)\b.*\bagent\b.*\b(to|that|which)\b",
 176            # Periodic / continuous language
 177            r"\bevery\s+\d+\s*(sec|min|hour|s\b|m\b|h\b)",
 178            r"\bcontinuously\b", r"\bconstantly\b", r"\bperiodically\b",
 179            r"\bkeep\s+(running|publishing|logging|sending|checking)\b",
 180            r"\b(subscribe|listen)\s+(to|for|on)\b",
 181            r"\blog\s+(the|every|each|all)\b",
 182        ]
 183        return any(re.search(p, lowered) for p in patterns)
 184
 185    async def _run_plan(self, task: str) -> str:
 186        workers = self._discover_workers()
 187        await self._log(f"Workers available: {[w['name'] for w in workers]}")
 188
 189        # ── Prune stale TopicBus contracts ────────────────────────────────
 190        # Remove contracts for agents that are no longer running so the
 191        # planner doesn't wire against dead topics.
 192        try:
 193            from ..core.topic_bus import get_topic_bus
 194            bus = get_topic_bus()
 195            if bus and self._registry:
 196                live = {a.name for a in self._registry.all_actors()}
 197                pruned = bus.registry.prune_stale(live)
 198                if pruned:
 199                    await self._log(f"Pruned {len(pruned)} stale TopicBus contract(s): {pruned}")
 200        except Exception:
 201            pass
 202
 203        # Detect pipeline vs one-shot
 204        is_pipeline = PlannerAgent._is_pipeline_request(task)
 205        if is_pipeline:
 206            await self._log("Pipeline request detected — spawning persistent agents...")
 207            return await self._run_pipeline(task, workers)
 208
 209        # ── 1. Check cache ─────────────────────────────────────────────────
 210        cache_key  = _task_hash(task)
 211        cached     = self._load_cached_plan(cache_key, workers)
 212        if cached:
 213            await self._log(f"Cache hit — reusing plan ({len(cached)} steps)")
 214            plan = cached
 215        else:
 216            await self._log("No cache hit — generating plan with LLM...")
 217            plan = await self._decompose(task, workers)
 218            if not plan:
 219                await self._log("Decomposition failed — answering directly")
 220                return await self._llm_answer(task)
 221
 222        # ── 2. Spawn any missing agents declared in the plan ───────────────
 223        plan = await self._ensure_agents(plan)
 224
 225        # ── 3. Execute ─────────────────────────────────────────────────────
 226        await self._log(f"Executing {len(plan)} step(s)...")
 227        results = await self._execute(plan)
 228
 229        # ── 4. Synthesize ──────────────────────────────────────────────────
 230        answer = await self._synthesize(task, plan, results)
 231
 232        # ── 5. Cache successful plan ───────────────────────────────────────
 233        if not cached:
 234            self._save_plan_cache(cache_key, task, plan)
 235            await self._log("Plan cached for future reuse.")
 236
 237        await self._log("Task complete.")
 238        if self._auto_terminate:
 239            asyncio.create_task(self._deferred_stop())
 240
 241        return answer
 242
 243    # ── Pipeline mode (persistent reactive agents) ─────────────────────────
 244
 245
 246    async def _run_pipeline(self, task: str, workers: list[dict]) -> str:
 247        """
 248        Builds and spawns persistent reactive agents for if/when/wherever rules.
 249
 250        Flow:
 251          0. Topic resolution — resolve vague data references to concrete MQTT topics
 252             using TopicRegistry + HA entity search. Enriches the task with specifics.
 253          1. _decompose_pipeline queries HomeAssistantAgent for real entity IDs
 254          2. LLM produces spawn configs (ha_actuator for HA actions, dynamic for everything else)
 255          3. Each agent is spawned and registered in main's spawn registry
 256          4. Rule is saved so it can be listed/deleted later
 257          5. Summary returned to the user
 258
 259        Multiple rules in one request are fully supported.
 260        """
 261        # ── Step 0: Topic resolution ───────────────────────────────────────
 262        # Before planning, resolve vague data references ("temperature", "motion",
 263        # "energy") to concrete MQTT topics or HA entities. This lets the user
 264        # say "react to temperature" without knowing the exact topic name.
 265        task, resolution_note = await self._resolve_data_references(task)
 266        if resolution_note:
 267            await self._log(f"Topic resolution: {resolution_note}")
 268
 269        plan = await self._decompose_pipeline(task, workers)
 270
 271        if not plan:
 272            await self._log("Pipeline decomposition failed — falling back to direct answer")
 273            return await self._llm_answer(task)
 274
 275        if len(plan) == 1 and "_feasibility_error" in plan[0]:
 276            error = plan[0]["_feasibility_error"]
 277            await self._log(f"Pipeline not feasible: {error}")
 278            return f"Cannot set up this pipeline:\n\n{error}"
 279
 280        await self._log(f"Pipeline plan: {len(plan)} agent(s)")
 281        spawned: list[str] = []
 282        wired: list[str] = []
 283        rule_agents: list[str] = []
 284
 285        for step in plan:
 286            name = step.get("name", "").strip()
 287            description = step.get("description", "")
 288            spawn_cfg = step.get("spawn_config")
 289
 290            if not name:
 291                await self._log("Step missing name — skipping")
 292                continue
 293
 294            if self._registry and self._registry.find_by_name(name):
 295                await self._log(f"'{name}' already running — skipping")
 296                wired.append(f"**{name}** (already active)")
 297                rule_agents.append(name)
 298                continue
 299
 300            if not spawn_cfg:
 301                await self._log(f"Step '{name}' has no spawn_config — skipping")
 302                continue
 303
 304            spawn_cfg = dict(spawn_cfg)
 305            spawn_cfg["name"] = name
 306
 307            spawn_type = spawn_cfg.get("type", "dynamic")
 308            await self._log(f"Spawning '{name}' (type={spawn_type})...")
 309            try:
 310                actor = await self._spawn_agent(spawn_cfg)
 311            except Exception as e:
 312                await self._log(f"Spawn failed for '{name}': {e}")
 313                wired.append(f"**{name}** — spawn failed: {e}")
 314                continue
 315
 316            if actor:
 317                self._spawned_by_planner.append(name)
 318                spawned.append(name)
 319                rule_agents.append(name)
 320
 321                # Register in main's spawn registry for auto-restore on restart
 322                if self._registry:
 323                    main = self._registry.find_by_name("main")
 324                    if main and hasattr(main, "_save_to_spawn_registry"):
 325                        registry_cfg = dict(spawn_cfg)
 326                        registry_cfg["name"] = name
 327                        registry_cfg["_rule"] = True
 328                        registry_cfg["_rule_task"] = task[:200]
 329                        main._save_to_spawn_registry(registry_cfg)
 330
 331                topics = spawn_cfg.get("mqtt_topics", [])
 332                label = f"**{name}** — {description}"
 333                if topics:
 334                    label += "\n  listens: " + ", ".join(topics)
 335                wired.append(label)
 336                await asyncio.sleep(0.3)
 337            else:
 338                wired.append(f"**{name}** — failed to spawn")
 339
 340        # Persist this rule into main's pipeline rules registry
 341        if rule_agents:
 342            import hashlib as _hl
 343            rule_id = _hl.md5(task.encode()).hexdigest()[:8]
 344            rule = {
 345                "rule_id": rule_id,
 346                "task": task,
 347                "agents": rule_agents,
 348                "created_at": time.time(),
 349            }
 350            # Save into main so it survives planner self-termination
 351            if self._registry:
 352                main = self._registry.find_by_name("main")
 353                if main and hasattr(main, "save_pipeline_rule"):
 354                    main.save_pipeline_rule(rule)
 355                    logger.info(f"[{self.name}] Pipeline rule {rule_id} saved to main")
 356
 357        self._auto_terminate = False
 358
 359        if not wired:
 360            return "Pipeline plan generated but no agents could be spawned. Check logs."
 361
 362        out = ["Pipeline active! Here's what I set up:\n"]
 363        if resolution_note:
 364            out.insert(0, f"📡 **Data source resolved:** {resolution_note}\n")
 365        out += [f"{i+1}. {w}" for i, w in enumerate(wired)]
 366        out.append("\nThese agents run continuously and react to events automatically.")
 367        out.append("Use `/rules` to see all active pipeline rules.")
 368        if spawned:
 369            out.append(f"\nSpawned: {', '.join(spawned)} — will auto-restore on restart.")
 370        return "\n".join(out)
 371
 372    async def _resolve_data_references(self, task: str) -> tuple[str, str]:
 373        """
 374        Resolve vague data references in a task to concrete MQTT topics or HA entities.
 375
 376        Examples:
 377          "log when temperature > 22"
 378            → finds sensors/test/temperature in TopicRegistry
 379            → enriches: "log when temperature > 22 [subscribe to: sensors/test/temperature]"
 380
 381          "alert when motion detected"
 382            → finds rpi-kitchen/camera/detections in TopicRegistry
 383            → enriches: "alert when motion detected [subscribe to: rpi-kitchen/camera/detections]"
 384
 385          "log when temperature > 22"  (no registered topics)
 386            → falls back to HA entity search
 387            → finds sensor.living_room_temperature
 388            → enriches: "log when temperature > 22 [HA entity: sensor.living_room_temperature]"
 389
 390          "log when temperature > 22"  (ambiguous — multiple sources)
 391            → returns the task unchanged + a note listing candidates
 392            → planner LLM receives the candidates and picks the best one
 393
 394        Returns: (enriched_task, resolution_note)
 395          enriched_task   — task with concrete topic/entity appended as context
 396          resolution_note — human-readable summary of what was found (shown to user)
 397        """
 398        import re
 399
 400        # ── Data concept keywords → search terms ──────────────────────────
 401        # Maps natural language concepts to TopicRegistry search keywords
 402        CONCEPT_MAP = {
 403            r"\btemp(erature)?\b":   ["temperature", "temp", "thermal"],
 404            r"\bhumid(ity)?\b":      ["humidity", "humid"],
 405            r"\bmotion\b":           ["motion", "pir", "presence", "detect"],
 406            r"\bpresence\b":         ["presence", "motion", "occupancy"],
 407            r"\benergy\b":           ["energy", "power", "kwh", "watt"],
 408            r"\bcpu\b":              ["cpu", "processor"],
 409            r"\bmemory\b":           ["memory", "ram"],
 410            r"\bco2\b":              ["co2", "carbon"],
 411            r"\bair quality\b":      ["air", "quality", "voc", "pm25"],
 412            r"\blight level\b":      ["light", "lux", "illumin"],
 413            r"\bnoise\b":            ["noise", "sound", "db"],
 414            r"\bdetect(ion)?\b":     ["detect", "yolo", "camera", "vision"],
 415            r"\bdoor\b":             ["door", "entry", "contact"],
 416            r"\bwindow\b":           ["window", "contact"],
 417            r"\bwater\b":            ["water", "flood", "leak"],
 418            r"\bgas\b":              ["gas", "methane", "smoke"],
 419            r"\bvoltage\b":          ["voltage", "power", "electric"],
 420        }
 421
 422        task_lower = task.lower()
 423
 424        # Find which concepts are mentioned in the task
 425        matched_concepts = []
 426        for pattern, keywords in CONCEPT_MAP.items():
 427            if re.search(pattern, task_lower):
 428                matched_concepts.extend(keywords)
 429
 430        if not matched_concepts:
 431            return task, ""  # No vague data references found
 432
 433        # ── Search TopicRegistry first ─────────────────────────────────────
 434        try:
 435            from ..core.topic_bus import get_topic_bus
 436            bus = get_topic_bus()
 437            if bus:
 438                # Deduplicate and search
 439                seen = set()
 440                candidates = []
 441                for kw in matched_concepts:
 442                    if kw in seen:
 443                        continue
 444                    seen.add(kw)
 445                    for contract in bus.registry.find_by_capability(kw):
 446                        for topic in contract.publishes:
 447                            if not any(c["topic"] == topic for c in candidates):
 448                                candidates.append({
 449                                    "topic":   topic,
 450                                    "agent":   contract.name,
 451                                    "node":    contract.node,
 452                                    "schema":  contract.produces_schema,
 453                                    "source":  "topic_registry",
 454                                })
 455
 456                if len(candidates) == 1:
 457                    # Unambiguous — auto-resolve
 458                    c = candidates[0]
 459                    node_str = f" on {c['node']}" if c.get("node") else ""
 460                    enriched = (
 461                        f"{task} "
 462                        f"[DATA SOURCE: subscribe to MQTT topic '{c['topic']}' "
 463                        f"published by {c['agent']}{node_str}. "
 464                        f"Use agent.subscribe('{c['topic']}', callback) in setup().]"
 465                    )
 466                    note = (
 467                        f"Found `{c['topic']}` from **{c['agent']}**{node_str} "
 468                        f"— using this as the data source."
 469                    )
 470                    return enriched, note
 471
 472                if len(candidates) > 1:
 473                    # Multiple matches — give all to LLM, let it pick best
 474                    sources = ", ".join(
 475                        f"'{c['topic']}' ({c['agent']})" for c in candidates[:5]
 476                    )
 477                    enriched = (
 478                        f"{task} "
 479                        f"[MULTIPLE DATA SOURCES FOUND: {sources}. "
 480                        f"Pick the most relevant topic based on the user's intent. "
 481                        f"Use agent.subscribe(chosen_topic, callback) in setup().]"
 482                    )
 483                    note = (
 484                        f"Found {len(candidates)} matching topics: "
 485                        + ", ".join(f"`{c['topic']}`" for c in candidates[:3])
 486                        + (" and more" if len(candidates) > 3 else "")
 487                        + " — planner will pick the most relevant."
 488                    )
 489                    return enriched, note
 490
 491        except Exception as e:
 492            logger.debug(f"[{self.name}] TopicRegistry search failed: {e}")
 493
 494        # ── Fallback: search HA entities ───────────────────────────────────
 495        # No registered agent topics found — check if HA has relevant sensors
 496        try:
 497            if self._registry:
 498                ha_agent = self._registry.find_by_name("home-assistant-agent")
 499                if ha_agent:
 500                    import uuid as _uuid
 501                    task_id = f"resolve_{_uuid.uuid4().hex[:6]}"
 502                    future = asyncio.get_running_loop().create_future()
 503                    self._result_futures[task_id] = future
 504                    await self.send(ha_agent.actor_id, MessageType.TASK, {
 505                        "text":     "list entities",
 506                        "_task_id": task_id,
 507                        "task":     task_id,
 508                    })
 509                    try:
 510                        result = await asyncio.wait_for(future, timeout=8.0)
 511                        # home-assistant-agent returns {"entities": [...]} — a flat list
 512                        # of entity dicts with entity_id, name, state, etc.
 513                        # NOT a nested devices→entities structure.
 514                        entities_raw = (
 515                            result.get("entities", [])
 516                            or result.get("result", [])
 517                            or result.get("devices", [])  # legacy fallback
 518                        )
 519                        if isinstance(entities_raw, str):
 520                            entities_raw = []
 521                    except (asyncio.TimeoutError, Exception):
 522                        entities_raw = []
 523                    finally:
 524                        self._result_futures.pop(task_id, None)
 525
 526                    # Search entity list for relevant matches
 527                    ha_candidates = []
 528                    for entity in entities_raw:
 529                        if not isinstance(entity, dict):
 530                            continue
 531                        # Handle both flat entity format and nested device format
 532                        if "entity_id" in entity:
 533                            # Flat format: {"entity_id": "sensor.temp", "name": "..."}
 534                            eid   = entity.get("entity_id", "")
 535                            ename = entity.get("friendly_name", "") or entity.get("name", "")
 536                            state = entity.get("state", "")
 537                            combined = (eid + " " + ename).lower()
 538                            if any(kw in combined for kw in matched_concepts):
 539                                ha_candidates.append({
 540                                    "entity_id": eid,
 541                                    "name":      ename,
 542                                    "state":     state,
 543                                    "source":    "home_assistant",
 544                                })
 545                        elif "entities" in entity:
 546                            # Nested device format (legacy): {"entities": [...]}
 547                            for sub in entity.get("entities", []):
 548                                eid   = sub.get("entity_id", "")
 549                                ename = sub.get("friendly_name", "") or sub.get("name", "")
 550                                state = sub.get("state", "")
 551                                combined = (eid + " " + ename).lower()
 552                                if any(kw in combined for kw in matched_concepts):
 553                                    ha_candidates.append({
 554                                        "entity_id": eid,
 555                                        "name":      ename,
 556                                        "state":     state,
 557                                        "source":    "home_assistant",
 558                                    })
 559
 560                    if len(ha_candidates) == 1:
 561                        c = ha_candidates[0]
 562                        enriched = (
 563                            f"{task} "
 564                            f"[DATA SOURCE: Home Assistant entity '{c['entity_id']}' "
 565                            f"(name: {c['name']}, current state: {c['state']}). "
 566                            f"Subscribe to homeassistant/state_changes/# and filter "
 567                            f"by payload.get('entity_id') == '{c['entity_id']}'. "
 568                            f"The value is in payload.get('new_state', {{}}).get('state').]"
 569                        )
 570                        note = (
 571                            f"No MQTT topic found — using HA entity "
 572                            f"**{c['name']}** (`{c['entity_id']}`, currently: {c['state']})."
 573                        )
 574                        return enriched, note
 575
 576                    if len(ha_candidates) > 1:
 577                        sources = ", ".join(
 578                            f"'{c['entity_id']}' ({c['name']})"
 579                            for c in ha_candidates[:4]
 580                        )
 581                        enriched = (
 582                            f"{task} "
 583                            f"[MULTIPLE HA ENTITIES FOUND: {sources}. "
 584                            f"Pick the most relevant. Subscribe to homeassistant/state_changes/# "
 585                            f"and filter by entity_id in the payload.]"
 586                        )
 587                        note = (
 588                            f"No MQTT topic found — found {len(ha_candidates)} HA entities: "
 589                            + ", ".join(f"`{c['entity_id']}`" for c in ha_candidates[:3])
 590                            + (" and more" if len(ha_candidates) > 3 else "")
 591                            + " — planner will pick the most relevant."
 592                        )
 593                        return enriched, note
 594
 595        except Exception as e:
 596            logger.debug(f"[{self.name}] HA entity search failed: {e}")
 597
 598        # ── Nothing found — return task unchanged with a note ──────────────
 599        concepts_str = ", ".join(set(matched_concepts[:4]))
 600        enriched = (
 601            f"{task} "
 602            f"[NOTE: No registered MQTT topics or HA entities found matching: {concepts_str}. "
 603            f"If the user has a sensor agent running, it may not have published yet. "
 604            f"Ask the user to specify the exact MQTT topic or HA entity ID, "
 605            f"or check agent.topics() for available data streams.]"
 606        )
 607        note = (
 608            f"No data source found for: {concepts_str}. "
 609            f"You may need to specify the exact topic or entity."
 610        )
 611        return enriched, note
 612
 613    async def _sample_live_topics(self, bus) -> list[str]:
 614        """
 615        Peek at one live MQTT message from each registered publish topic.
 616        Returns formatted lines with actual field names and an example value.
 617
 618        This is the fallback when observed_samples haven't been captured yet
 619        (e.g. the producer started before the schema-capture code was deployed).
 620
 621        Uses a single MQTT connection with a short per-topic timeout so it
 622        doesn't block planning. Topics that don't publish within the window
 623        are silently skipped.
 624        """
 625        import json as _json
 626
 627        try:
 628            import aiomqtt
 629        except ImportError:
 630            return []
 631
 632        sample_lines = []
 633        topics_to_sample: list[tuple[str, str]] = []  # (topic, agent_name)
 634
 635        for contract in bus.registry.all_contracts():
 636            for topic in (contract.publishes or [])[:5]:
 637                if not any(t == topic for t, _ in topics_to_sample):
 638                    topics_to_sample.append((topic, contract.name))
 639            if len(topics_to_sample) >= 10:
 640                break
 641
 642        if not topics_to_sample:
 643            return []
 644
 645        broker = getattr(self, "_mqtt_broker", "localhost")
 646        port   = getattr(self, "_mqtt_port", 1883)
 647
 648        # Subscribe to ALL topics on one connection, collect first message per topic
 649        # with a global timeout so we never hang.
 650        received: dict[str, dict] = {}   # topic → payload
 651
 652        async def _collect():
 653            try:
 654                async with aiomqtt.Client(broker, port) as client:
 655                    for topic, _ in topics_to_sample:
 656                        await client.subscribe(topic)
 657                    async for msg in client.messages:
 658                        t = str(msg.topic)
 659                        if t not in received:
 660                            try:
 661                                payload = _json.loads(msg.payload.decode())
 662                            except Exception:
 663                                payload = msg.payload.decode()
 664                            if isinstance(payload, dict):
 665                                received[t] = payload
 666                        # Stop once we have a sample for every topic
 667                        if len(received) >= len(topics_to_sample):
 668                            return
 669            except Exception as e:
 670                logger.debug(f"[{self.name}] _sample_live_topics connection error: {e}")
 671
 672        # Wait at most N seconds total (not per-topic) — covers the common case
 673        # where producers publish every few seconds.  Stale topics just get skipped.
 674        max_wait = min(15.0, 5.0 + 2.0 * len(topics_to_sample))
 675        try:
 676            await asyncio.wait_for(_collect(), timeout=max_wait)
 677        except asyncio.TimeoutError:
 678            pass  # we'll use whatever we collected so far
 679
 680        # Build sample lines and store back into contracts
 681        topic_to_agent = {t: a for t, a in topics_to_sample}
 682        for topic, payload in received.items():
 683            agent_name = topic_to_agent.get(topic, "?")
 684            fields = {
 685                k: type(v).__name__
 686                for k, v in payload.items()
 687                if not k.startswith("_")
 688            }
 689            # Persist into contract for future calls (no repeated sampling)
 690            for contract in bus.registry.all_contracts():
 691                if topic in (contract.publishes or []):
 692                    contract.update_observed(topic, payload)
 693                    break
 694            sample_lines.append(
 695                f"  Topic: {topic}  (published by {agent_name})\n"
 696                f"    Fields: {fields}\n"
 697                f"    Example payload: {payload}"
 698            )
 699
 700        if sample_lines:
 701            logger.info(
 702                f"[{self.name}] Sampled {len(sample_lines)} live topic(s) for schema introspection"
 703            )
 704        return sample_lines
 705
 706    async def _decompose_pipeline(self, task: str, workers: list[dict]) -> list[dict]:
 707        """
 708        Decomposes a reactive pipeline request into persistent agent spawn configs.
 709
 710        Flow:
 711          1. Query HomeAssistantAgent for live entities (delegates — no duplication)
 712          2. Feasibility check — surface clear error if required HA entities are missing
 713          3. LLM produces spawn configs with real entity IDs and correct MQTT wiring
 714        """
 715        if not self.llm:
 716            return []
 717
 718        # ── 1. Get HA entities via HomeAssistantAgent ──────────────────────
 719        ha_entities_text = ""
 720        ha_available = False
 721
 722        try:
 723            if self._registry and self._registry.find_by_name("home-assistant-agent"):
 724                result = await self._delegate("home-assistant-agent", "list_entities")
 725                if result and not result.get("error"):
 726                    entities_list = result.get("entities", [])
 727                    if entities_list:
 728                        lines = []
 729                        for e in entities_list[:200]:
 730                            eid = e.get("entity_id", "")
 731                            ename = e.get("name", "")
 732                            plat = e.get("platform", "")
 733                            if eid:
 734                                parts = [eid]
 735                                if ename and ename != eid:
 736                                    parts.append(f"name={ename}")
 737                                if plat:
 738                                    parts.append(f"platform={plat}")
 739                                lines.append("  " + "  ".join(parts))
 740                        ha_entities_text = "\n".join(lines)
 741                        ha_available = True
 742                        logger.info(f"[{self.name}] Got {len(entities_list)} HA entities via home-assistant-agent")
 743        except Exception as e:
 744            logger.warning(f"[{self.name}] Could not query home-assistant-agent: {e}")
 745
 746        # Fallback: fetch directly if HA agent is unavailable
 747        if not ha_available:
 748            try:
 749                from ..config import CONFIG
 750                from ..core.integrations.home_assistant.ha_helper import fetch_devices_entities_with_location
 751                ha_url = (CONFIG.ha_url or "").rstrip("/")
 752                ha_token = (CONFIG.ha_token or "").strip()
 753                if ha_url and ha_token:
 754                    devices = await fetch_devices_entities_with_location(ha_url, ha_token, include_states=True)
 755                    lines = []
 756                    for device in devices[:150]:
 757                        area = device.get("area", "")
 758                        for entity in device.get("entities", []):
 759                            eid = entity.get("entity_id", "")
 760                            ename = entity.get("friendly_name") or entity.get("name", "")
 761                            state = entity.get("state", "")
 762                            if eid:
 763                                parts = [eid]
 764                                if ename: parts.append(f"name={ename}")
 765                                if area: parts.append(f"area={area}")
 766                                if state: parts.append(f"state={state}")
 767                                lines.append("  " + "  ".join(parts))
 768                    ha_entities_text = "\n".join(lines)
 769                    ha_available = bool(lines)
 770                    logger.info(f"[{self.name}] Direct HA fetch: {len(lines)} entities")
 771            except Exception as e:
 772                logger.warning(f"[{self.name}] Direct HA fetch failed: {e}")
 773
 774        ha_section = ha_entities_text if ha_entities_text else \
 775            "  (HA not reachable — use entity IDs provided by the user)"
 776
 777        # ── Fetch TopicBus context (live data flows + wiring opportunities) ─
 778        topic_bus_section = ""
 779        topic_samples_section = ""
 780        try:
 781            from ..core.topic_bus import get_topic_bus
 782            bus = get_topic_bus()
 783            if bus and bus.registry.all_contracts():
 784                topic_bus_section = bus.to_planner_context()
 785                logger.info(f"[{self.name}] TopicBus: {len(bus.registry.all_contracts())} contracts")
 786
 787                # ── Sample live payloads from registered topics ────────────
 788                # Captures ACTUAL field names so the LLM uses "temp" not "temperature"
 789                sample_lines = []
 790                for contract in bus.registry.all_contracts():
 791                    samples = contract.observed_samples or {}
 792                    if samples:
 793                        for topic, info in samples.items():
 794                            example = info.get("example", {})
 795                            fields  = info.get("fields", {})
 796                            sample_lines.append(
 797                                f"  Topic: {topic}  (published by {contract.name})\n"
 798                                f"    Fields: {fields}\n"
 799                                f"    Example payload: {example}"
 800                            )
 801
 802                # If no observed_samples yet, try to peek at one live message
 803                # from each published topic via MQTT (fast — 3s timeout each)
 804                if not sample_lines:
 805                    sample_lines = await self._sample_live_topics(bus)
 806
 807                if sample_lines:
 808                    topic_samples_section = (
 809                        "LIVE TOPIC SAMPLES (actual payloads — use THESE field names in code):\n"
 810                        + "\n".join(sample_lines)
 811                    )
 812
 813            else:
 814                topic_bus_section = (
 815                    "No topic contracts registered yet.\n"
 816                    "Agents can declare contracts via agent.declare_contract() in setup().\n"
 817                    "Once declared, the planner can wire agents automatically by topic compatibility."
 818                )
 819        except Exception as e:
 820            topic_bus_section = f"TopicBus unavailable: {e}"
 821
 822        # ── Fetch stored notification URLs from main ──────────────────────
 823        notification_urls: dict = {}
 824        if self._registry:
 825            main = self._registry.find_by_name("main")
 826            if main and hasattr(main, "get_notification_urls"):
 827                notification_urls = main.get_notification_urls()
 828
 829        # Also extract any URL directly mentioned in the task
 830        import re as _re
 831        _url_match = _re.search(
 832            r'https?://(?:discord\.com/api/webhooks|hooks\.slack\.com|api\.telegram\.org)/\S+',
 833            task
 834        )
 835        if _url_match:
 836            url = _url_match.group(0).rstrip(".,;!)'\"")
 837            if "discord" in url:
 838                notification_urls["discord"] = url
 839            elif "slack" in url:
 840                notification_urls["slack"] = url
 841            elif "telegram" in url:
 842                notification_urls["telegram"] = url
 843
 844        notif_section = ""
 845        if notification_urls:
 846            lines = ["NOTIFICATION URLS (use these directly in code — do not use placeholders):"]
 847            for svc, url in notification_urls.items():
 848                lines.append(f"  {svc}: {url}")
 849            notif_section = "\n".join(lines)
 850        else:
 851            notif_section = (
 852                "NOTIFICATION URLS: none stored.\n"
 853                "If the user wants Discord/Slack/Telegram notifications and no URL is available,\n"
 854                "use a placeholder 'WEBHOOK_URL_REQUIRED' and set description to explain the user must run:\n"
 855                "  /webhook discord <url>"
 856            )
 857        _local_kw = ("camera", "webcam", "laptop", "detect", "yolo", "person",
 858                     "object detection", "cv2", "opencv",
 859                     "discord", "telegram", "slack", "notify", "notification", "message")
 860        _skip_feasibility = any(kw in task.lower() for kw in _local_kw)
 861
 862        if ha_available and ha_entities_text and not _skip_feasibility:
 863            feas_prompt = (
 864                "Check if this reactive automation can be fulfilled with available HA entities.\n\n"
 865                f"USER REQUEST: {task}\n\n"
 866                f"AVAILABLE HA ENTITIES:\n{ha_section}\n\n"
 867                'Return JSON only:\n'
 868                '{"feasible": true/false, "reason": "<one sentence if not feasible>", "relevant_entities": ["entity_id", ...]}\n\n'
 869                "Rules:\n"
 870                "- feasible=true only if ALL required entity types exist\n"
 871                "- Camera/webcam/Discord/notification requests: always feasible=true"
 872            )
 873            try:
 874                feas_resp, _ = await self.llm.complete(
 875                    messages=[{"role": "user", "content": feas_prompt}],
 876                    system="Output only valid JSON. No markdown.",
 877                    max_tokens=400,
 878                )
 879                clean = feas_resp.strip()
 880                for fence in ("```json", "```"):
 881                    if clean.startswith(fence):
 882                        clean = clean[len(fence):]
 883                    if clean.endswith("```"):
 884                        clean = clean[:-3]
 885                clean = clean.strip()
 886                feas = json.loads(clean)
 887                if not feas.get("feasible", True):
 888                    reason = feas.get("reason", "Cannot fulfill request with available HA entities.")
 889                    logger.warning(f"[{self.name}] Feasibility failed: {reason}")
 890                    return [{"_feasibility_error": reason}]
 891                logger.info(f"[{self.name}] Feasibility OK — relevant: {feas.get('relevant_entities', [])}")
 892            except Exception as e:
 893                logger.warning(f"[{self.name}] Feasibility check error (continuing): {e}")
 894
 895        # ── 3. Decompose into spawn configs ────────────────────────────────
 896        # Build the prompt as a list of parts to avoid f-string escape issues
 897        prompt_parts = [
 898            "You are designing reactive automation pipelines for a multi-agent IoT system.",
 899            "Output ONLY a valid JSON array — no explanation, no markdown, no code fences.",
 900            "",
 901            "═══ SYSTEM ARCHITECTURE ═══",
 902            "",
 903            "HomeAssistantStateBridgeAgent (ALWAYS running, NEVER spawn again):",
 904            "  Publishes every HA state change to MQTT.",
 905            "  Topic format depends on HA_STATE_BRIDGE_PER_ENTITY config — can be either:",
 906            "    Flat:       homeassistant/state_changes                          (all entities, one topic)",
 907            "    Per-entity: homeassistant/state_changes/{domain}/{full_entity_id} (one topic per entity)",
 908            "  ALWAYS subscribe to the wildcard: homeassistant/state_changes/#",
 909            "  This catches BOTH formats and never breaks regardless of config.",
 910            '  Payload always contains: {"entity_id": "light.wiz_...", "domain": "light", "new_state": {"state": "on", ...}, "old_state": {...}}',
 911            "  Filter by entity_id IN THE PAYLOAD — never rely on the topic path for filtering.",
 912            "  NOTE: 'state' is NESTED inside new_state — check payload['new_state']['state'].",
 913            "",
 914            "═══ AGENT TYPES ═══",
 915            "",
 916            'TYPE 1 — "ha_actuator"',
 917            "  Purpose: call any Home Assistant service (turn_on, turn_off, set_temperature, open_cover, etc.)",
 918            "  No code needed. Subscribes to an MQTT trigger topic and calls the HA service.",
 919            "  detection_filter matches TOP-LEVEL keys of the incoming payload only.",
 920            "  spawn_config schema:",
 921            '    "type": "ha_actuator"',
 922            '    "automation_id": "<unique-kebab-id>"',
 923            '    "description": "<what this does>"',
 924            '    "mqtt_topics": ["<trigger-topic>"]',
 925            '    "actions": [{"domain": "<ha-domain>", "service": "<ha-service>", "entity_id": "<entity_id-from-list>", "service_data": {}}]',
 926            '    "conditions": []',
 927            '    "detection_filter": {"<top-level-key>": <value>} or null',
 928            '    "cooldown_seconds": <number>',
 929            "",
 930            'TYPE 2 — "dynamic"',
 931            "  Purpose: any logic that needs code — state filtering, webcam, timers, HTTP webhooks, Discord, etc.",
 932            "  Define these async functions (all optional except at least one must exist):",
 933            "    async def setup(agent)   — runs once on start, good for subscriptions and init",
 934            "    async def process(agent) — runs in a loop every poll_interval seconds",
 935            "  Available APIs (ONLY these — no other agent methods exist):",
 936            '    await agent.log("message")                        — structured log (ASYNC, must await)',
 937            '    await agent.publish("topic", {dict})              — publish to MQTT (ASYNC, must await)',
 938            '    await agent.alert("message")                      — trigger alert (ASYNC, must await)',
 939            '    await agent.send_to("name", payload)              — delegate to agent (ASYNC, must await)',
 940            '    await agent.mqtt_get("topic")                     — one-shot MQTT read (ASYNC, must await)',
 941            '    agent.subscribe("topic", async_callback)          — subscribe to MQTT (SYNC, NO await!)',
 942            '                                                        callback(payload_dict) per message',
 943            '                                                        runs as background task, setup() returns immediately',
 944            '    agent.window("topic", seconds=N)                  — sliding window (SYNC, NO await!)',
 945            '    agent.recall("key")                               — load persisted value (SYNC, NO await!)',
 946            '    agent.persist("key", value)                       — save persisted value (SYNC, NO await!)',
 947            '    agent.declare_contract(...)                        — register topic contract (SYNC, NO await!)',
 948            '    agent.state["key"]                                — in-memory dict (cleared on restart)',
 949            "  CRITICAL RULES FOR DYNAMIC AGENT CODE:",
 950            "    NEVER use await on agent.subscribe(), agent.window(), agent.persist(), agent.recall(), agent.declare_contract()",
 951            "    NEVER import or use aiomqtt directly — use agent.subscribe() instead",
 952            "    NEVER hardcode MQTT broker hostnames or ports — agent.subscribe() handles this automatically",
 953            "    NEVER use asyncio.create_task() for MQTT — agent.subscribe() already creates the background task",
 954            "    agent.subscribe() is non-blocking — call it in setup() and return immediately",
 955            "  spawn_config schema:",
 956            '    "type": "dynamic"',
 957            '    "description": "<what this does>"',
 958            '    "install": ["<pip-package>", ...]       — packages to install before running',
 959            '    "poll_interval": <seconds>              — how often process(agent) runs',
 960            '    "code": "<full python source as single string with \\n for newlines>"',
 961            "",
 962            "═══ CANONICAL WIRING PATTERNS ═══",
 963            "",
 964            "PATTERN 1 — HA sensor triggers HA action (door → light, motion → switch, temp → AC):",
 965            "  Problem: HA state is nested in new_state.state, ha_actuator can only filter top-level keys.",
 966            "  Solution: use a dynamic filter agent to extract and re-publish the trigger.",
 967            "  Agent 1 (dynamic, name: '<slug>-state-filter'):",
 968            "    setup(agent): use agent.subscribe() to listen to homeassistant/state_changes/{domain}/{entity_id}",
 969            "      Check new_state['state'] against condition, if met: await agent.publish('custom/triggers/<slug>', {'triggered': True})",
 970            "    agent.subscribe() runs as a background task — setup() must return immediately after calling it.",
 971            "  Agent 2 (ha_actuator, name: '<slug>-actuator'):",
 972            "    mqtt_topics: ['custom/triggers/<slug>']",
 973            "    detection_filter: {'triggered': True}",
 974            "    actions: [the HA service call with the correct entity_id]",
 975            "  CONDITION EXAMPLES:",
 976            "    Binary sensor (door/window/motion): new_state['state'] == 'on'",
 977            "    Numeric sensor (temperature/humidity): float(new_state.get('state', 0)) > threshold",
 978            "    Switch/light: new_state['state'] == 'on' or 'off'",
 979            "  PATTERN 1 CODE TEMPLATE:",
 980            "    async def setup(agent):",
 981            "        async def on_state(payload):",
 982            "            if payload.get('entity_id') != 'light.wiz_rgbw_tunable_02cba0': return",
 983            "            state = payload.get('new_state', {}).get('state', '')",
 984            "            if state == 'on':  # adapt condition to user request",
 985            "                await agent.publish('custom/triggers/<slug>', {'triggered': True, 'state': state})",
 986            "        # Use wildcard — works regardless of per-entity or flat topic config",
 987            "        agent.subscribe('homeassistant/state_changes/#', on_state)",
 988            "",
 989            "PATTERN 2 — HA sensor triggers notification (Discord, Slack, HTTP webhook):",
 990            "  ONE dynamic agent using agent.subscribe():",
 991            "    async def setup(agent):",
 992            "        async def on_state(payload):",
 993            "            if payload.get('entity_id') != 'light.wiz_rgbw_tunable_02cba0': return",
 994            "            state = payload.get('new_state', {}).get('state', '')",
 995            "            if state == 'on':  # adapt condition",
 996            "                import httpx",
 997            "                async with httpx.AsyncClient() as c:",
 998            "                    await c.post('<WEBHOOK_URL>', json={'content': 'Lamp turned on!'})",
 999            "                await agent.log('Discord notification sent')",
1000            "        # Use wildcard — works regardless of per-entity or flat topic config",
1001            "        agent.subscribe('homeassistant/state_changes/#', on_state)",
1002            "  Install: httpx",
1003            "  IMPORTANT: use the exact webhook URL from NOTIFICATION URLS section below.",
1004            "",
1005            "PATTERN 3 — Webcam/camera object detection triggers HA action:",
1006            "  Agent 1 (dynamic, name: '<slug>-camera-detect'):",
1007            "    setup(agent): load YOLO model and open camera",
1008            "    process(agent): capture frame, run inference, determine if target object is detected,",
1009            "      publish {'detected': bool, 'target': '<object-name>', 'objects': [list-of-all-detected]}",
1010            "      to custom/detections/<slug>",
1011            "    Install: ultralytics, opencv-python",
1012            "    poll_interval: 1",
1013            "  Agent 2 (ha_actuator, name: '<slug>-actuator'):",
1014            "    mqtt_topics: ['custom/detections/<slug>']",
1015            "    detection_filter: {'detected': True}",
1016            "    actions: [HA service call]",
1017            "  IMPORTANT: publish {'detected': bool} not {'person_detected': bool} — generic for any object.",
1018            "  In code: target = '<object-name-from-user-request>'; detected = target in set(detected_labels)",
1019            "",
1020            "PATTERN 4 — Webcam detection triggers notification:",
1021            "  Agent 1: same as Pattern 3 agent 1",
1022            "  Agent 2 (dynamic, name: '<slug>-notify'):",
1023            "    setup(agent): use agent.subscribe() on custom/detections/<slug>",
1024            "      When detected=True: POST notification via httpx",
1025            "",
1026            "PATTERN 5 — Timer/schedule triggers HA action:",
1027            "  Agent 1 (dynamic, name: '<slug>-timer'):",
1028            "    process(agent): check current time (import datetime), if matches schedule:",
1029            "      await agent.publish('custom/triggers/<slug>', {'triggered': True})",
1030            "    poll_interval: 60",
1031            "  Agent 2 (ha_actuator): subscribes to custom/triggers/<slug>",
1032            "",
1033            "PATTERN 6 — MQTT sensor data + condition → HA action (e.g. 'if temp > 20 turn off lamp'):",
1034            "  This combines multiple data sources and triggers an HA action. NEVER use httpx for HA!",
1035            "  Agent 1 (dynamic, name: '<slug>-monitor'):",
1036            "    setup(agent): subscribe to relevant MQTT topics using agent.subscribe()",
1037            "      In callback: check conditions, if met → await agent.publish('custom/triggers/<slug>', {'triggered': True})",
1038            "    Example: subscribe to sensor topic AND HA state topic, check both conditions",
1039            "  Agent 2 (ha_actuator, name: '<slug>-actuator'):",
1040            "    mqtt_topics: ['custom/triggers/<slug>']",
1041            "    detection_filter: {'triggered': True}",
1042            "    actions: [{'domain': 'light', 'service': 'turn_off', 'entity_id': 'light.xxx'}]",
1043            "  PATTERN 6 CODE TEMPLATE:",
1044            "    async def setup(agent):",
1045            "        agent.state['lamp_on'] = False",
1046            "        agent.state['temp'] = 0",
1047            "        async def on_temp(payload):",
1048            "            agent.state['temp'] = payload.get('temp', 0)  # use EXACT field name from OBSERVED samples",
1049            "            await check_and_trigger()",
1050            "        async def on_lamp(payload):",
1051            "            agent.state['lamp_on'] = payload.get('state') == 'on'",
1052            "            await check_and_trigger()",
1053            "        async def check_and_trigger():",
1054            "            if agent.state['lamp_on'] and agent.state['temp'] > 20:",
1055            "                await agent.publish('custom/triggers/lamp-temp', {'triggered': True})",
1056            "                await agent.log('Condition met! Trigger published.')",
1057            "        agent.subscribe('custom/sensors/temp_humidity', on_temp)",
1058            "        agent.subscribe('lamp/status', on_lamp)",
1059            "",
1060            "═══ GENERAL RULES ═══",
1061            "",
1062            "╔══════════════════════════════════════════════════════════════════╗",
1063            "║  CRITICAL — HOME ASSISTANT ACTIONS                              ║",
1064            "║  NEVER call HA REST API directly from dynamic agent code!       ║",
1065            "║  NEVER use httpx/requests to POST to /api/services/*.           ║",
1066            "║  ALWAYS use an ha_actuator agent for ANY HA service call.       ║",
1067            "║                                                                 ║",
1068            "║  CORRECT: dynamic agent publishes trigger → ha_actuator acts    ║",
1069            "║  WRONG:   dynamic agent calls httpx.post('http://ha/api/...')   ║",
1070            "╚══════════════════════════════════════════════════════════════════╝",
1071            "",
1072            "  If a dynamic agent needs to turn on/off a light, switch, or any HA device:",
1073            "    1. The dynamic agent publishes a trigger: await agent.publish('custom/triggers/<slug>', {'triggered': True})",
1074            "    2. A SEPARATE ha_actuator agent subscribes to that trigger and executes the HA service call",
1075            "  This is Patterns 1 and 5 — ALWAYS follow this two-agent pattern for HA actions.",
1076            "",
1077            "- Use EXACT entity_id values from the HA entities list — never invent entity IDs",
1078            "- For HA service calls (in ha_actuator config, NOT in dynamic agent code):",
1079            "  light → light.turn_on / light.turn_off",
1080            "  switch → switch.turn_on / switch.turn_off",
1081            "  climate → climate.set_temperature / climate.set_hvac_mode",
1082            "  cover → cover.open_cover / cover.close_cover",
1083            "  script → script.turn_on",
1084            "- Multiple rules in one request → output ALL agents for ALL rules",
1085            "- Each agent does exactly ONE job — keep it minimal",
1086            "- Replace <slug> consistently across paired agents with a short descriptive kebab-case id",
1087            "- ALWAYS subscribe to homeassistant/state_changes/# (wildcard) — NEVER to a specific sub-topic",
1088            "  Filter by entity_id in the payload: if payload.get('entity_id') != 'light.xyz': return",
1089            "  This works regardless of whether HA_STATE_BRIDGE_PER_ENTITY is on or off",
1090            "- If user provides a Discord webhook URL, use it directly in code",
1091            "- If user provides a condition threshold (e.g. 'above 28 degrees'), encode it in the filter agent code",
1092            "- Dynamic agent code must be a single string with actual \\n newlines (not literal backslash-n)",
1093            "- TOPIC-BASED WIRING: if LIVE DATA FLOWS shows an agent already publishing relevant data,",
1094            "  subscribe to that topic instead of spawning a duplicate agent.",
1095            "  Example: if 'person-detector' publishes 'rpi-kitchen/camera/detections',",
1096            "  a notification agent should subscribe to that topic, not spawn its own camera agent.",
1097            "- Use agent.declare_contract() in setup() to declare what topics an agent publishes/subscribes.",
1098            "  This makes the agent discoverable for future auto-wiring.",
1099            "- Use agent.window(topic, seconds=N) for temporal reasoning:",
1100            "  'if motion detected 3+ times in 5 minutes' → agent.window('motion/events', seconds=300).event_count() >= 3",
1101            "- Use agent.read_world_state(topic) to read retained shared state without subscribing.",
1102            "- Use agent.publish_world_state(key, data) to share state that other agents can read.",
1103            "",
1104            "═══ LIVE DATA FLOWS (topic contracts) ═══",
1105            topic_bus_section,
1106            "",
1107            *(  # Include live topic samples if available
1108                [
1109                    "═══ LIVE TOPIC SAMPLES (use EXACTLY these field names in code!) ═══",
1110                    topic_samples_section,
1111                    "",
1112                    "CRITICAL: When subscribing to a topic listed above, use the EXACT field names",
1113                    "from the sample payload. For example if the sample shows {'temp': 30.5},",
1114                    "use payload['temp'] — NOT payload['temperature']. The field names in the",
1115                    "samples are authoritative.",
1116                    "",
1117                ]
1118                if topic_samples_section else []
1119            ),
1120            "═══ HOME ASSISTANT ENTITIES ═══",
1121            ha_section,
1122            "",
1123            "═══ NOTIFICATION URLS ═══",
1124            notif_section,
1125            "",
1126            "═══ OUTPUT FORMAT ═══",
1127            "JSON array. Each element:",
1128            '{"name": "<unique-kebab-name>", "description": "<one sentence>", "spawn_config": {<full spawn_config>}}',
1129            "",
1130            "═══ USER REQUEST ═══",
1131            task,
1132        ]
1133        prompt = "\n".join(prompt_parts)
1134
1135        try:
1136            response, _ = await self.llm.complete(
1137                messages=[{"role": "user", "content": prompt}],
1138                system="You are a JSON-only pipeline architect. Output only a valid JSON array. No markdown, no explanation.",
1139                max_tokens=4000,
1140            )
1141            clean = response.strip()
1142            if clean.startswith("```"):
1143                clean = "\n".join(clean.split("\n")[1:])
1144            if "```" in clean:
1145                clean = clean[:clean.rfind("```")]
1146            start = clean.find("[")
1147            end = clean.rfind("]")
1148            if start != -1 and end != -1:
1149                clean = clean[start:end + 1]
1150            plan = json.loads(clean.strip())
1151            if isinstance(plan, list):
1152                # Validate generated code — catch common LLM mistakes
1153                plan = self._validate_pipeline_code(plan)
1154                logger.info(f"[{self.name}] Pipeline plan: {len(plan)} step(s)")
1155                for i, step in enumerate(plan):
1156                    sc = step.get("spawn_config", {})
1157                    logger.info(
1158                        f"[{self.name}]   step {i + 1}: name={step.get('name')}  "
1159                        f"type={sc.get('type')}  topics={sc.get('mqtt_topics', [])}"
1160                    )
1161                return plan
1162        except Exception as e:
1163            logger.error(f"[{self.name}] Pipeline decomposition error: {e}")
1164        return []
1165
1166    # ── Pipeline code validator ────────────────────────────────────────────
1167
1168    def _validate_pipeline_code(self, plan: list[dict]) -> list[dict]:
1169        """
1170        Scan generated dynamic agent code for common LLM mistakes and fix them.
1171        Currently catches:
1172          - Raw aiomqtt.Client() usage (should use agent.subscribe() instead)
1173          - Hardcoded MQTT broker hostnames
1174          - `await` on synchronous agent API methods (subscribe, window, persist, etc.)
1175        Logs warnings so the user knows what was fixed.
1176        """
1177        import re as _re
1178
1179        # Synchronous agent API methods that must NOT be awaited
1180        _SYNC_METHODS = (
1181            "subscribe", "window", "persist", "recall",
1182            "declare_contract", "agents", "nodes", "topics",
1183            "capabilities", "increment_processed", "increment_errors",
1184        )
1185        _sync_pat = r"\bawait\s+(agent\.(?:" + "|".join(_SYNC_METHODS) + r")\s*\()"
1186
1187        for step in plan:
1188            sc = step.get("spawn_config", {})
1189            if sc.get("type") != "dynamic":
1190                continue
1191            code = sc.get("code", "")
1192            if not code:
1193                continue
1194
1195            issues = []
1196
1197            # Strip `await` on sync agent methods
1198            fixed_code, n_subs = _re.subn(_sync_pat, r"\1", code)
1199            if n_subs:
1200                issues.append(f"removed {n_subs} spurious await(s) on sync agent methods")
1201                sc["code"] = fixed_code
1202                code = fixed_code
1203
1204            # Detect raw aiomqtt.Client() — LLM should use agent.subscribe()
1205            if "aiomqtt.Client(" in code or "aiomqtt.connect(" in code:
1206                issues.append("raw aiomqtt.Client() — should use agent.subscribe()")
1207                # Attempt to rewrite: extract topic and replace entire aiomqtt block
1208                # with agent.subscribe() pattern
1209                topics = _re.findall(r'await\s+client\.subscribe\(["\']([^"\']+)["\']', code)
1210                if topics:
1211                    topic = topics[0]
1212                    # Build replacement code using agent.subscribe()
1213                    fixed = self._rewrite_aiomqtt_to_subscribe(code, topic)
1214                    if fixed:
1215                        sc["code"] = fixed
1216                        code = fixed
1217                        logger.info(f"[{self.name}] Auto-fixed raw aiomqtt in '{step.get('name')}' → agent.subscribe('{topic}')")
1218
1219            # Detect direct HA REST API calls — should use ha_actuator instead
1220            _ha_api_patterns = [
1221                r'/api/services/',
1222                r'/api/states/',
1223                r'httpx.*api/services',
1224                r'requests\.(post|put|get).*api/services',
1225                r'aiohttp.*api/services',
1226            ]
1227            for pat in _ha_api_patterns:
1228                if _re.search(pat, code):
1229                    issues.append(
1230                        f"DIRECT HA API CALL detected ('{pat[:30]}...') — "
1231                        f"should use ha_actuator agent instead"
1232                    )
1233                    logger.warning(
1234                        f"[{self.name}] '{step.get('name')}' calls HA API directly! "
1235                        f"This will likely fail. Should use ha_actuator pattern: "
1236                        f"dynamic agent publishes trigger → ha_actuator executes HA service call."
1237                    )
1238                    break
1239
1240            if issues:
1241                logger.warning(
1242                    f"[{self.name}] Code issues in '{step.get('name')}': {'; '.join(issues)}"
1243                )
1244
1245        return plan
1246
1247    @staticmethod
1248    def _rewrite_aiomqtt_to_subscribe(code: str, topic: str) -> str:
1249        """
1250        Best-effort rewrite of raw aiomqtt MQTT subscription code to use agent.subscribe().
1251        Extracts the message handling callback and rewires it.
1252        Returns empty string if rewrite fails (original code kept).
1253        """
1254        import re as _re
1255
1256        # Try to extract the callback body — look for the inner async for loop body
1257        # Pattern: async for msg/message in client.messages: ... payload handling ...
1258        match = _re.search(
1259            r'async\s+for\s+\w+\s+in\s+client\.messages:\s*\n(.*?)(?=\n\s*except|\n\s*$)',
1260            code,
1261            _re.DOTALL,
1262        )
1263        if not match:
1264            return ""
1265
1266        callback_body = match.group(1)
1267
1268        # Detect how payload is parsed — json.loads(msg.payload) or similar
1269        payload_parse = ""
1270        if "json.loads" in callback_body:
1271            payload_parse = "    # payload is already a dict (parsed by agent.subscribe)\n"
1272
1273        # Strip leading indentation from callback body
1274        lines = callback_body.splitlines()
1275        min_indent = min((len(l) - len(l.lstrip()) for l in lines if l.strip()), default=4)
1276        dedented = "\n".join("    " + l[min_indent:] for l in lines if l.strip())
1277
1278        # Extract any setup code before the aiomqtt block
1279        pre_match = _re.split(r'async\s+with\s+aiomqtt\.Client', code)[0]
1280        pre_lines = [l for l in pre_match.splitlines()
1281                     if l.strip() and not l.strip().startswith("import aiomqtt")
1282                     and not l.strip().startswith("async def setup")]
1283        pre_code = "\n".join("    " + l.strip() for l in pre_lines if l.strip()) + "\n" if pre_lines else ""
1284
1285        rewritten = (
1286            f"async def setup(agent):\n"
1287            f"{pre_code}"
1288            f"    async def _on_message(payload):\n"
1289            f"{payload_parse}"
1290            f"{dedented}\n"
1291            f"    agent.subscribe('{topic}', _on_message)\n"
1292            f"    await agent.log('Subscribed to {topic}')\n"
1293        )
1294
1295        # Preserve any process() or handle_task() that existed
1296        import re as _re2
1297        for fn in ("process", "handle_task"):
1298            fn_match = _re2.search(rf'async\s+def\s+{fn}\s*\(', code)
1299            if fn_match:
1300                rewritten += "\n" + code[fn_match.start():]
1301                break
1302
1303        return rewritten
1304
1305    # ── Plan cache ─────────────────────────────────────────────────────────
1306
1307    def _load_cached_plan(self, cache_key: str, workers: list[dict]) -> Optional[list]:
1308        """Load a cached plan if it exists, is fresh, and all required agents are alive."""
1309        raw = self.recall(_PLAN_CACHE_KEY) or {}
1310        entry = raw.get(cache_key)
1311        if not entry:
1312            return None
1313
1314        # TTL check
1315        age = time.time() - entry.get("timestamp", 0)
1316        if age > _CACHE_TTL_S:
1317            logger.info(f"[{self.name}] Cache expired ({age/3600:.1f}h old)")
1318            return None
1319
1320        plan = entry.get("plan", [])
1321        if not plan:
1322            return None
1323
1324        # Validate all agents in the plan are still running
1325        alive = {w["name"] for w in workers} | {"main", self.name}
1326        for step in plan:
1327            agent = step.get("agent", "")
1328            if agent not in alive and not step.get("spawn_config"):
1329                logger.info(f"[{self.name}] Cache invalid — agent '{agent}' no longer running")
1330                return None
1331
1332        return plan
1333
1334    def _save_plan_cache(self, cache_key: str, task: str, plan: list):
1335        """Persist the plan so future similar tasks can reuse it."""
1336        raw = self.recall(_PLAN_CACHE_KEY) or {}
1337        # Evict entries older than TTL
1338        now = time.time()
1339        raw = {k: v for k, v in raw.items() if now - v.get("timestamp", 0) < _CACHE_TTL_S}
1340        raw[cache_key] = {
1341            "task":      task[:200],
1342            "plan":      plan,
1343            "timestamp": now,
1344        }
1345        self.persist(_PLAN_CACHE_KEY, raw)
1346
1347    # ── Worker discovery ───────────────────────────────────────────────────
1348
1349    def _discover_workers(self) -> list[dict]:
1350        if not self._registry:
1351            return []
1352        # Pull full manifests from main's capability registry (includes schemas)
1353        main = self._registry.find_by_name("main")
1354        manifest_map: dict = {}
1355        if main and hasattr(main, "list_capabilities"):
1356            for cap in main.list_capabilities():
1357                manifest_map[cap["name"]] = cap
1358
1359        workers = []
1360        for actor in self._registry.all_actors():
1361            if actor.name in _SKIP_AGENTS or actor.name == self.name:
1362                continue
1363            # Prefer manifest data (richer), fall back to live actor attrs
1364            manifest = manifest_map.get(actor.name, {})
1365            workers.append({
1366                "name":          actor.name,
1367                "type":          type(actor).__name__,
1368                "description":   (
1369                    manifest.get("description")
1370                    or getattr(actor, "description", "")
1371                    or getattr(actor, "system_prompt", "")[:100]
1372                    or type(actor).__name__
1373                ),
1374                "capabilities":  manifest.get("capabilities", []),
1375                "input_schema":  manifest.get("input_schema",  {}),
1376                "output_schema": manifest.get("output_schema", {}),
1377                "publishes":     manifest.get("publishes", []),
1378                "observed_samples": manifest.get("observed_samples", {}),
1379            })
1380        return workers
1381
1382    # ── Decomposition ──────────────────────────────────────────────────────
1383
1384    async def _decompose(self, task: str, workers: list[dict]) -> list[dict]:
1385        """LLM breaks task into steps. Can declare missing agents with spawn configs."""
1386        if not self.llm:
1387            return []
1388
1389        def _fmt_worker(w: dict) -> str:
1390            lines = [f"  - {w['name']} ({w['type']}): {w['description']}"]
1391            if w.get("capabilities"):
1392                lines.append(f"    capabilities: {', '.join(w['capabilities'])}")
1393            if w.get("input_schema"):
1394                lines.append(f"    input_schema : {w['input_schema']}")
1395            if w.get("output_schema"):
1396                lines.append(f"    output_schema: {w['output_schema']}")
1397            if w.get("publishes"):
1398                lines.append(f"    publishes: {w['publishes']}")
1399            if w.get("observed_samples"):
1400                for topic, info in w["observed_samples"].items():
1401                    fields = info.get("fields", {})
1402                    example = info.get("example", {})
1403                    lines.append(f"    topic '{topic}' payload fields: {fields}  example: {example}")
1404            return "\n".join(lines)
1405
1406        workers_desc = "\n".join(_fmt_worker(w) for w in workers)
1407
1408        # ── Gather live topic samples for schema context ──────────────────
1409        topic_schema_ctx = ""
1410        try:
1411            from ..core.topic_bus import get_topic_bus
1412            bus = get_topic_bus()
1413            if bus:
1414                sample_lines = []
1415                for contract in bus.registry.all_contracts():
1416                    samples = contract.observed_samples or {}
1417                    for topic, info in samples.items():
1418                        example = info.get("example", {})
1419                        fields  = info.get("fields", {})
1420                        sample_lines.append(
1421                            f"  {topic} (by {contract.name}): fields={fields}  example={example}"
1422                        )
1423                if not sample_lines:
1424                    sample_lines = await self._sample_live_topics(bus)
1425                if sample_lines:
1426                    topic_schema_ctx = (
1427                        "\n\nLIVE TOPIC SCHEMAS (use EXACTLY these field names in generated code):\n"
1428                        + "\n".join(sample_lines)
1429                        + "\nCRITICAL: Use the exact field names from the samples above. "
1430                        "If a sample shows 'temp', use payload['temp'] — NOT payload['temperature'].\n"
1431                    )
1432        except Exception:
1433            pass
1434
1435        prompt = f"""You are a task planner for a multi-agent system.
1436Break the task into steps. Each step is handled by one agent.
1437
1438AVAILABLE AGENTS (with input/output contracts):
1439{workers_desc}
1440{topic_schema_ctx}
1441TASK: {task}
1442
1443OUTPUT RULES:
1444- Respond ONLY with a valid JSON array. No explanation, no markdown.
1445- Each step object:
1446  {{
1447    "step": <int>,
1448    "agent": "<agent-name>",
1449    "task": "<what to ask this agent>",
1450    "parallel": <true|false>,
1451    "depends_on": [<step ints>],
1452    "spawn_config": <null or spawn object if agent needs to be created>
1453  }}
1454- "parallel": true if this step can run concurrently with other parallel steps
1455- "depends_on": step numbers whose results this step needs (empty list if none)
1456- "spawn_config": if the ideal agent for a step does NOT exist in the available list,
1457  include a spawn config to create it.
1458  AGENT TYPE RULES:
1459    Use "llm" ONLY for pure conversation/Q&A/explanation agents (no external APIs or tools).
1460    Use "dynamic" for anything that fetches data, calls APIs, runs searches, or uses libraries.
1461
1462    CRITICAL — sync vs async agent API methods:
1463      SYNCHRONOUS (NO await):
1464        agent.subscribe(topic, callback)  — fire-and-forget background task
1465        agent.window(topic, seconds=N)    — returns StreamWindow immediately
1466        agent.persist(key, val)           — save to disk
1467        agent.recall(key)                 — load from disk
1468        agent.declare_contract(...)       — register topic contract
1469        agent.agents()                    — list running agents
1470        agent.topics(keyword)             — list known topics
1471      ASYNC (MUST await):
1472        await agent.publish(topic, data)  — publish to MQTT
1473        await agent.log(msg)              — log a message
1474        await agent.alert(msg)            — trigger alert
1475        await agent.send_to(name, payload)— delegate to another agent
1476        await agent.mqtt_get(topic)       — one-shot MQTT read
1477
1478    NEVER use agent.logger — it does not exist. Use await agent.log(msg) instead.
1479
1480    CRITICAL — HOME ASSISTANT ACTIONS:
1481      NEVER call HA REST API directly from dynamic agent code (no httpx/requests to /api/services/).
1482      For ANY HA device action (turn on/off lights, switches, climate, etc.):
1483        Use "type": "ha_actuator" — NOT a dynamic agent with httpx.
1484        If a condition must be checked first, use TWO agents:
1485          1. Dynamic agent checks condition → publishes trigger to custom/triggers/<slug>
1486          2. ha_actuator agent subscribes to trigger → executes HA service call
1487      ha_actuator spawn_config example:
1488      {{
1489        "name": "lamp-off-actuator",
1490        "type": "ha_actuator",
1491        "description": "Turns off the lamp when triggered",
1492        "mqtt_topics": ["custom/triggers/lamp-temp"],
1493        "detection_filter": {{"triggered": true}},
1494        "actions": [{{"domain": "light", "service": "turn_off", "entity_id": "light.wiz_rgbw_tunable_02cba0"}}]
1495      }}
1496  LLM agent example:
1497  {{
1498    "name": "translator-agent",
1499    "type": "llm",
1500    "system_prompt": "You are an expert translator. Translate text accurately."
1501  }}
1502  Dynamic agent example (for weather, news, search, APIs):
1503  {{
1504    "name": "weather-agent",
1505    "type": "dynamic",
1506    "description": "Fetches live weather data for a city",
1507    "input_schema":  {{"city": "str — city name to fetch weather for"}},
1508    "output_schema": {{"city": "str", "temp_c": "str", "description": "str"}},
1509    "poll_interval": 3600,
1510    "code": "async def setup(agent):\n    await agent.log('ready')\nasync def process(agent):\n    import asyncio\n    await asyncio.sleep(3600)\nasync def handle_task(agent, payload):\n    import httpx\n    city = payload.get('city', 'Athens')\n    async with httpx.AsyncClient(timeout=10) as c:\n        r = await c.get(f'https://wttr.in/{{city}}?format=j1')\n        d = r.json()\n    cur = d['current_condition'][0]\n    return {{'city': city, 'temp_c': cur['temp_C'], 'description': cur['weatherDesc'][0]['value']}}"
1511  }}
1512- The FINAL synthesis step should ALWAYS be assigned to "main" (not any other agent).
1513  Main will combine results using its LLM. Never assign synthesis to a domain agent.
1514- Only create new agents when TRULY necessary — prefer existing agents.
1515- If one agent can handle everything, output a single-step plan.
1516- Keep it minimal — avoid unnecessary steps.
1517- IMPORTANT: For any step that combines, summarizes, synthesizes or compares results
1518  from other steps, ALWAYS use "agent": "main" — never a domain agent.
1519- Domain agents (weather, news, manual, etc.) are for DATA RETRIEVAL only.
1520  "main" handles all reasoning, summarization and synthesis.
1521
1522Example:
1523[
1524  {{"step": 1, "agent": "weather-agent", "task": "Get weather in Athens", "parallel": true, "depends_on": [], "spawn_config": null}},
1525  {{"step": 2, "agent": "news-agent", "task": "Get AI news today", "parallel": true, "depends_on": [], "spawn_config": null}},
1526  {{"step": 3, "agent": "main", "task": "Summarize the weather and news results", "parallel": false, "depends_on": [1, 2], "spawn_config": null}}
1527]"""
1528
1529        try:
1530            response, _ = await self.llm.complete(
1531                messages=[{"role": "user", "content": prompt}],
1532                system="You are a JSON-only task planner. Output only valid JSON arrays, nothing else.",
1533                max_tokens=1500,
1534            )
1535            clean = response.strip()
1536            # Strip markdown fences
1537            if clean.startswith("```"):
1538                clean = "\n".join(clean.split("\n")[1:])
1539            if clean.endswith("```"):
1540                clean = "\n".join(clean.split("\n")[:-1])
1541            plan = json.loads(clean.strip())
1542            if isinstance(plan, list) and plan:
1543                return plan
1544        except Exception as e:
1545            logger.error(f"[{self.name}] Decomposition error: {e}")
1546        return []
1547
1548    # ── Missing agent spawning ─────────────────────────────────────────────
1549
1550    async def _ensure_agents(self, plan: list[dict]) -> list[dict]:
1551        """
1552        For any step with a spawn_config, spawn the agent if it's not running.
1553        Updates the plan with the actual agent name once spawned.
1554
1555        Continuous agents (those with a process() loop or subscribe-based setup)
1556        are marked with _spawn_only=True so _execute_step skips delegation —
1557        spawning them WAS the action.
1558        """
1559        if not self._registry:
1560            return plan
1561
1562        for step in plan:
1563            spawn_config = step.get("spawn_config")
1564            if not spawn_config:
1565                continue
1566
1567            agent_name = spawn_config.get("name") or step.get("agent")
1568            existing   = self._registry.find_by_name(agent_name)
1569
1570            if existing:
1571                await self._log(f"Agent '{agent_name}' already running — skipping spawn")
1572                step["agent"] = agent_name
1573                continue
1574
1575            await self._log(f"Spawning missing agent: '{agent_name}'")
1576            try:
1577                actor = await self._spawn_agent(spawn_config)
1578                if actor:
1579                    step["agent"] = agent_name
1580                    self._spawned_by_planner.append(agent_name)
1581
1582                    # Detect if this is a continuous/persistent agent.
1583                    # If the code has a process() loop or uses agent.subscribe(),
1584                    # delegation via TASK would just timeout — spawning IS the action.
1585                    code = spawn_config.get("code", "")
1586                    is_continuous = bool(
1587                        spawn_config.get("type") == "dynamic"
1588                        and code
1589                        and (
1590                            "def process(" in code
1591                            or "agent.subscribe(" in code
1592                            or "agent.window(" in code
1593                        )
1594                        # Only if there's no meaningful handle_task that does work
1595                        and "def handle_task(" not in code
1596                    )
1597                    if is_continuous:
1598                        step["_spawn_only"] = True
1599                        await self._log(
1600                            f"'{agent_name}' is continuous — spawn is the action, skipping delegation"
1601                        )
1602
1603                    # Brief pause to let agent initialise
1604                    await asyncio.sleep(1.0)
1605                    await self._log(f"'{agent_name}' ready.")
1606                else:
1607                    await self._log(f"Failed to spawn '{agent_name}' — step will use main as fallback")
1608                    step["agent"] = "main"
1609            except Exception as e:
1610                logger.error(f"[{self.name}] Spawn of '{agent_name}' failed: {e}")
1611                step["agent"] = "main"
1612
1613        return plan
1614
1615    async def _spawn_agent(self, config: dict) -> Optional[Actor]:
1616        """Spawn an agent from a config dict — same logic as MainActor._spawn_from_config."""
1617        agent_type = config.get("type", "dynamic")
1618        name       = config.get("name", "spawned-agent")
1619
1620        if agent_type == "ha_actuator":
1621            from .home_assistant_actuator_agent import (
1622                HomeAssistantActuatorAgent, ActuatorConfig,
1623                ActuatorAction, ActuatorCondition,
1624            )
1625            # Ensure automation_id is unique — append short hash if needed
1626            automation_id = config.get("automation_id", name)
1627            if self._registry and self._registry.find_by_name(f"actuator-{automation_id[:20]}"):
1628                import hashlib
1629                suffix = hashlib.md5(f"{automation_id}{time.time()}".encode()).hexdigest()[:4]
1630                automation_id = f"{automation_id}-{suffix}"
1631                name = f"actuator-{automation_id[:20]}"
1632            actuator_config = ActuatorConfig(
1633                automation_id = automation_id,
1634                description   = config.get("description", ""),
1635                mqtt_topics   = config.get("mqtt_topics", []),
1636                actions       = [ActuatorAction.from_dict(a) for a in config.get("actions", [])],
1637                conditions    = [ActuatorCondition.from_dict(c) for c in config.get("conditions", [])],
1638                detection_filter = config.get("detection_filter"),
1639                cooldown_seconds = float(config.get("cooldown_seconds", 10.0)),
1640            )
1641            actor = await self.spawn(
1642                HomeAssistantActuatorAgent,
1643                config=actuator_config,
1644                name=name,
1645                persistence_dir=str(self._persistence_dir.parent),
1646            )
1647            await self._register_with_main(config)
1648            return actor
1649
1650        if agent_type == "llm":
1651            from .llm_agent import LLMAgent
1652            actor = await self.spawn(
1653                LLMAgent,
1654                name=name,
1655                llm_provider=self.llm,
1656                system_prompt=config.get("system_prompt", "You are a helpful assistant."),
1657                persistence_dir=str(self._persistence_dir.parent),
1658            )
1659            # Save to main's spawn registry so it persists across restarts
1660            await self._register_with_main(config)
1661            return actor
1662
1663        if agent_type == "dynamic":
1664            code = config.get("code", "").strip()
1665            if not code:
1666                logger.warning(f"[{self.name}] Dynamic spawn config has no code for '{name}'")
1667                return None
1668            from .dynamic_agent import DynamicAgent
1669            actor = await self.spawn(
1670                DynamicAgent,
1671                name=name,
1672                code=code,
1673                poll_interval=float(config.get("poll_interval") or 1.0),
1674                description=config.get("description", ""),
1675                input_schema=config.get("input_schema", {}),
1676                output_schema=config.get("output_schema", {}),
1677                llm_provider=self.llm,
1678                persistence_dir=str(self._persistence_dir.parent),
1679            )
1680            await self._register_with_main(config)
1681            return actor
1682
1683        if agent_type == "manual":
1684            from .manual_agent import ManualAgent
1685            actor = await self.spawn(
1686                ManualAgent,
1687                name=name,
1688                llm_provider=self.llm,
1689                persistence_dir=str(self._persistence_dir.parent),
1690            )
1691            await self._register_with_main(config)
1692            return actor
1693
1694        logger.warning(f"[{self.name}] Unknown agent type: '{agent_type}'")
1695        return None
1696
1697    async def _register_with_main(self, config: dict):
1698        """Tell main to add this agent to its spawn registry so it survives restarts."""
1699        if not self._registry:
1700            return
1701        main = self._registry.find_by_name("main")
1702        if main and hasattr(main, "_save_to_spawn_registry"):
1703            main._save_to_spawn_registry(config)
1704            logger.info(f"[{self.name}] Registered '{config.get('name')}' with main's spawn registry")
1705
1706    # ── Execution ──────────────────────────────────────────────────────────
1707
1708    async def _execute(self, plan: list[dict]) -> dict:
1709        results:   dict       = {}
1710        completed: set[int]   = set()
1711        remaining: list[dict] = list(plan)
1712
1713        while remaining:
1714            ready = [
1715                s for s in remaining
1716                if all(d in completed for d in (s.get("depends_on") or []))
1717            ]
1718            if not ready:
1719                logger.error(f"[{self.name}] Plan deadlock — aborting remaining steps")
1720                break
1721
1722            parallel   = [s for s in ready if s.get("parallel", False)]
1723            sequential = [s for s in ready if not s.get("parallel", False)]
1724
1725            if parallel:
1726                await self._log(f"Parallel: steps {[s['step'] for s in parallel]}")
1727                outputs = await asyncio.gather(
1728                    *[self._execute_step(s, results) for s in parallel],
1729                    return_exceptions=True,
1730                )
1731                for step, out in zip(parallel, outputs):
1732                    results[step["step"]] = out if not isinstance(out, Exception) else {"error": str(out)}
1733                    completed.add(step["step"])
1734                    remaining.remove(step)
1735
1736            for step in sequential:
1737                await self._log(f"Sequential: step {step['step']} → @{step['agent']}")
1738                results[step["step"]] = await self._execute_step(step, results)
1739                completed.add(step["step"])
1740                remaining.remove(step)
1741
1742        return results
1743
1744    async def _execute_step(self, step: dict, prior: dict) -> dict:
1745        agent_name = step.get("agent", "main")
1746        task_text  = step.get("task", "")
1747        depends_on = step.get("depends_on") or []
1748
1749        # Continuous agents (process loop / subscribe-based) were already started
1750        # by _ensure_agents — spawning them WAS the action. Don't send a TASK
1751        # that would just timeout because there's no handle_task to respond.
1752        if step.get("_spawn_only"):
1753            await self._log(f"  ✓ @{agent_name}: spawned and running (continuous agent)")
1754            return {
1755                "result": f"Agent '{agent_name}' spawned and running continuously.",
1756                "spawned": True,
1757            }
1758
1759        # Inject context from prior steps
1760        if depends_on:
1761            ctx = []
1762            for dep in depends_on:
1763                r = prior.get(dep, {})
1764                t = (r.get("result") or r.get("text") or r.get("answer") or str(r))[:600]
1765                ctx.append(f"[Step {dep} result]: {t}")
1766            if ctx:
1767                task_text += "\n\nContext from previous steps:\n" + "\n".join(ctx)
1768
1769        if agent_name in ("main", self.name):
1770            return {"result": await self._llm_answer(task_text)}
1771
1772        await self._log(f"  → @{agent_name}: {task_text[:60]}")
1773        result = await self._delegate(agent_name, task_text)
1774        if not result:
1775            return {"error": f"No response from {agent_name}"}
1776        # If agent reported an error, check if we can replan around it
1777        if "error" in result and "error_phase" in result:
1778            await self._log(
1779                f"  ⚠ @{agent_name} failed ({result['error_phase']}): {result['error'][:80]}"
1780            )
1781            # Try main as fallback synthesizer
1782            await self._log(f"  → falling back to @main for this step")
1783            fallback = await self._llm_answer(
1784                f"The agent '{agent_name}' failed. Do your best to answer: {task_text}"
1785            )
1786            return {"result": fallback, "fallback": True, "original_error": result["error"]}
1787        return result
1788
1789    # ── Delegation ─────────────────────────────────────────────────────────
1790
1791    async def _delegate(self, agent_name: str, task: str, timeout: float = 60.0) -> Optional[dict]:
1792        return await self._delegate_with_payload(agent_name, {"text": task}, timeout=timeout)
1793
1794    async def _delegate_with_payload(self, agent_name: str, payload: dict, timeout: float = 60.0) -> Optional[dict]:
1795        if not self._registry:
1796            return None
1797        target = self._registry.find_by_name(agent_name)
1798        if not target:
1799            logger.warning(f"[{self.name}] Agent '{agent_name}' not found for delegation")
1800            return {"error": f"Agent '{agent_name}' not found"}
1801
1802        import uuid
1803        task_id = str(uuid.uuid4())[:8]
1804        future: asyncio.Future = asyncio.get_running_loop().create_future()
1805        self._result_futures[task_id] = future
1806
1807        await self.send(target.actor_id, MessageType.TASK, {
1808            **payload, "_task_id": task_id, "_reply_to": self.actor_id
1809        })
1810        try:
1811            return await asyncio.wait_for(future, timeout=timeout)
1812        except asyncio.TimeoutError:
1813            logger.warning(f"[{self.name}] Timeout from '{agent_name}'")
1814            return {"error": f"Timeout from {agent_name}"}
1815        finally:
1816            self._result_futures.pop(task_id, None)
1817
1818    # ── Synthesis ──────────────────────────────────────────────────────────
1819
1820    async def _synthesize(self, task: str, plan: list[dict], results: dict) -> str:
1821        # If every step was a spawn-only continuous agent, skip LLM synthesis
1822        # and return a clean confirmation — no need to "summarize" spawns.
1823        all_spawned = all(
1824            isinstance(results.get(s["step"]), dict)
1825            and results[s["step"]].get("spawned")
1826            for s in plan
1827        )
1828        if all_spawned:
1829            agents = [s["agent"] for s in plan]
1830            lines = [f"Done! Spawned {len(agents)} continuous agent(s):\n"]
1831            for s in plan:
1832                desc = ""
1833                sc = s.get("spawn_config") or {}
1834                desc = sc.get("description", s.get("task", ""))
1835                lines.append(f"• **{s['agent']}** — {desc}")
1836            lines.append("\nThey're running now and will auto-restore on restart.")
1837            return "\n".join(lines)
1838
1839        if not self.llm:
1840            parts = []
1841            for s in plan:
1842                r = results.get(s["step"], {})
1843                t = r.get("result") or r.get("text") or r.get("answer") or str(r)
1844                parts.append(f"[@{s['agent']}]: {t}")
1845            return "\n\n".join(parts)
1846
1847        results_text = []
1848        for s in plan:
1849            r = results.get(s["step"], {})
1850            t = (r.get("result") or r.get("text") or r.get("answer") or str(r))[:800]
1851            results_text.append(f"Step {s['step']} (@{s['agent']}): {t}")
1852
1853        prompt = (
1854            f"You collected results from multiple agents for this task:\n\n"
1855            f"ORIGINAL TASK: {task}\n\n"
1856            f"RESULTS:\n" + "\n\n".join(results_text) +
1857            "\n\nSynthesize into a single, clear, well-structured answer for the user. "
1858            "Do not mention agent names, step numbers, or internal system details."
1859        )
1860        try:
1861            response, _ = await self.llm.complete(
1862                messages=[{"role": "user", "content": prompt}],
1863                system="You synthesize multi-agent results into clean, user-facing answers.",
1864                max_tokens=2048,
1865            )
1866            return response
1867        except Exception as e:
1868            logger.error(f"[{self.name}] Synthesis failed: {e}")
1869            return "\n\n".join(results_text)
1870
1871    async def _llm_answer(self, task: str) -> str:
1872        if not self.llm:
1873            return f"[No LLM available: {task}]"
1874        try:
1875            response, _ = await self.llm.complete(
1876                messages=[{"role": "user", "content": task}],
1877                system="You are a helpful assistant.",
1878                max_tokens=2048,
1879            )
1880            return response
1881        except Exception as e:
1882            return f"[LLM error: {e}]"
1883
1884    # ── Helpers ────────────────────────────────────────────────────────────
1885
1886    async def _deferred_stop(self):
1887        await asyncio.sleep(2.0)
1888        await self._log("Self-terminating.")
1889        if self._registry:
1890            await self._registry.unregister(self.actor_id)
1891        await self.stop()
1892
1893    async def _log(self, msg: str):
1894        logger.info(f"[{self.name}] {msg}")
1895        await self._mqtt_publish(
1896            f"agents/{self.actor_id}/logs",
1897            {"type": "log", "message": msg, "timestamp": time.time()},
1898        )

On-demand orchestrator. Spawned per complex task, self-terminates when done.

PlannerAgent( llm_provider: Optional[wactorz.agents.llm_agent.LLMProvider] = None, task: str = '', reply_to_id: str = '', reply_task_id: str = '', auto_terminate: bool = True, **kwargs)
42    def __init__(
43        self,
44        llm_provider:   Optional[LLMProvider] = None,
45        task:           str = "",
46        reply_to_id:    str = "",
47        reply_task_id:  str = "",
48        auto_terminate: bool = True,
49        **kwargs,
50    ):
51        kwargs.setdefault("name", "planner")
52        super().__init__(**kwargs)
53        self.llm              = llm_provider
54        self._task            = task
55        self._reply_to_id     = reply_to_id
56        self._reply_task_id   = reply_task_id
57        self._auto_terminate  = auto_terminate
58        self._result_futures: dict[str, asyncio.Future] = {}
59        self._spawned_by_planner: list[str] = []   # agents we created this run
llm
async def on_start(self):
66    async def on_start(self):
67        await self._log(f"Planner ready. Task: {self._task[:80]}")
68        if self._task:
69            asyncio.create_task(self._report_plan(self._task))

Called when actor starts. Override for init logic.

async def handle_message(self, msg: Message):
73    async def handle_message(self, msg: Message):
74        if msg.type == MessageType.TASK:
75            payload   = msg.payload if isinstance(msg.payload, dict) else {"text": str(msg.payload)}
76            task_text = payload.get("text") or payload.get("task") or str(msg.payload)
77            self._reply_to_id = payload.get("_reply_to") or msg.reply_to or msg.sender_id or self._reply_to_id
78            task_id           = payload.get("_task_id")
79            await self._log(f"Received task: {task_text[:80]}")
80            result = await self._run_plan(task_text)
81            if self._reply_to_id:
82                # Use the initiating task_id (from main) so the future resolves,
83                # falling back to the message-level task_id if present
84                resolve_id = self._reply_task_id or task_id
85                reply = {"result": result, "text": result}
86                if resolve_id:
87                    reply["_task_id"] = resolve_id
88                if self._spawned_by_planner:
89                    reply["spawned"] = self._spawned_by_planner
90                await self.send(self._reply_to_id, MessageType.RESULT, reply)
91
92        elif msg.type == MessageType.RESULT:
93            payload = msg.payload if isinstance(msg.payload, dict) else {}
94            task_id = payload.get("_task_id")
95            if task_id and task_id in self._result_futures:
96                fut = self._result_futures[task_id]
97                if not fut.done():
98                    fut.set_result(payload)

Handle messages not caught by default handlers.

class DynamicAgent(wactorz.Actor):
 56class DynamicAgent(Actor):
 57    """
 58    Generic actor shell. Core behavior is provided as Python source code strings.
 59    The LLM writes setup/process/handle_task functions; this class runs them.
 60    """
 61
 62    def __init__(
 63        self,
 64        code: str,                          # LLM-generated Python source
 65        poll_interval: float = 1.0,         # seconds between process() calls
 66        description: str = "",              # what this agent does
 67        input_schema: dict = None,          # expected task payload fields
 68        output_schema: dict = None,         # returned result fields
 69        llm_provider=None,                  # optional LLM for agent.llm.chat()
 70        trusted: bool = False,              # True = catalog agent, skip safety validator
 71        **kwargs,
 72    ):
 73        super().__init__(**kwargs)
 74        self._code           = code
 75        self.poll_interval   = poll_interval
 76        self.description     = description
 77        self.input_schema    = input_schema  or {}
 78        self.output_schema   = output_schema or {}
 79        self._llm_provider   = llm_provider
 80        self._trusted        = trusted       # catalog agents bypass safety checks
 81
 82        # Compiled functions — populated in on_start
 83        self._fn_setup       = None
 84        self._fn_process     = None
 85        self._fn_handle_task = None
 86
 87        # Namespace shared across all calls (agent can store state here)
 88        self._ns: dict       = {}
 89
 90        # Cost tracking (populated by _LLMInterface if LLM is used)
 91        self.total_input_tokens  = 0
 92        self.total_output_tokens = 0
 93        self.total_cost_usd      = 0.0
 94
 95        # Error tracking for health classification
 96        self._consecutive_errors: int   = 0
 97        self._error_threshold:    int   = 3      # DEGRADED after this many
 98        self._last_error_time:    float = 0.0
 99        self._error_phase:        str   = ""     # compile|setup|process|handle_task
100
101        # Public API exposed to generated code via `agent` parameter
102        self._api            = _AgentAPI(self)
103
104    # ── Lifecycle ──────────────────────────────────────────────────────────
105
106    async def on_start(self):
107        # ── Compile with LLM self-correction on syntax errors ─────────────
108        current_code = self._code
109        error_msg    = self._compile_code(current_code)
110
111        if error_msg:
112            for attempt in range(1, self._MAX_COMPILE_RETRIES + 1):
113                logger.warning(
114                    f"[{self.name}] Compile error (attempt {attempt}): {error_msg}"
115                )
116                fixed = await self._fix_syntax_with_llm(current_code, error_msg)
117                if fixed is None:
118                    # LLM unavailable — no point retrying
119                    break
120                self._ns = {}                      # fresh namespace for retry
121                new_err = self._compile_code(fixed)
122                if new_err is None:
123                    # Fix worked — update stored code so restarts use the good version
124                    self._code = fixed
125                    error_msg  = None
126                    logger.info(f"[{self.name}] Code fixed by LLM after {attempt} attempt(s).")
127                    await self._mqtt_publish(
128                        f"agents/{self.actor_id}/logs",
129                        {"type": "log",
130                         "message": f"Syntax error fixed by LLM after {attempt} attempt(s).",
131                         "timestamp": time.time()},
132                    )
133                    break
134                # Fix compiled but still broken — feed it back for the next attempt
135                current_code = fixed
136                error_msg    = new_err
137
138        if error_msg:
139            # All attempts exhausted — publish fatal and stop
140            err_exc = SyntaxError(error_msg)
141            logger.error(f"[{self.name}] Code compilation failed permanently: {error_msg}")
142            await self._publish_error(phase="compile", error=err_exc,
143                                      traceback_str=error_msg, fatal=True)
144            return
145
146        # ── setup() ───────────────────────────────────────────────────────
147        if self._fn_setup:
148            # Run setup as a background task so long-running loops (e.g. aiomqtt
149            # subscriptions) don't block on_start() and prevent heartbeats from firing.
150            self._tasks.append(asyncio.create_task(self._run_setup()))
151        else:
152            if self._fn_process:
153                self._tasks.append(asyncio.create_task(self._process_loop()))
154
155        # Publish manifest immediately so main's registry knows this agent exists
156        # even if it never calls publish() (pure handle_task agents, etc.)
157        await self._api._publish_manifest()
158
159    async def on_stop(self):
160        # ── Unregister from TopicBus so stale contracts don't accumulate ───
161        try:
162            from ..core.topic_bus import get_topic_bus
163            bus = get_topic_bus()
164            if bus:
165                bus.unregister(self.name)
166                logger.debug(f"[{self.name}] Unregistered from TopicBus")
167        except Exception:
168            pass  # TopicBus unavailable — not fatal
169
170        # ── Give generated code a chance to clean up ───────────────────────
171        cleanup = self._ns.get("cleanup")
172        if cleanup:
173            try:
174                await asyncio.wait_for(cleanup(self._api), timeout=10.0)
175            except asyncio.TimeoutError:
176                logger.warning(f"[{self.name}] cleanup() timed out after 10s")
177            except Exception as e:
178                logger.warning(f"[{self.name}] cleanup() error: {e}")
179
180        # ── Force-release common resources that LLM code may have opened ───
181        # Even if cleanup() didn't run or missed something, we try to release
182        # known resource types stored in agent.state.
183        state = getattr(self._api, 'state', {}) if self._api else {}
184
185        # Release cv2 VideoCapture handles
186        for key in list(state.keys()):
187            obj = state.get(key)
188            if obj is None:
189                continue
190            # cv2.VideoCapture
191            if hasattr(obj, 'release') and hasattr(obj, 'isOpened'):
192                try:
193                    if obj.isOpened():
194                        obj.release()
195                        logger.info(f"[{self.name}] Released camera handle '{key}'")
196                except Exception:
197                    pass
198            # Close any open file handles
199            elif hasattr(obj, 'close') and hasattr(obj, 'closed'):
200                try:
201                    if not obj.closed:
202                        obj.close()
203                        logger.debug(f"[{self.name}] Closed file handle '{key}'")
204                except Exception:
205                    pass
206
207        # ── Cancel any tasks spawned inside setup/process code ─────────────
208        # Generated code may have called asyncio.create_task() directly without
209        # adding to _tasks. We can't track those, but we can ensure all tasks
210        # we DO track are properly cancelled and awaited.
211        for task in self._tasks:
212            if not task.done():
213                task.cancel()
214        # Give cancelled tasks a moment to actually stop
215        if self._tasks:
216            await asyncio.gather(*self._tasks, return_exceptions=True)
217
218    # ── Code compilation ───────────────────────────────────────────────────
219
220    @staticmethod
221    def _sanitize_code(code: str) -> str:
222        """
223        Block-aware sanitizer. Removes LLM self-setup patterns entirely:
224        - try/except blocks containing LLM imports
225        - if/else blocks checking api_key or llm_backend
226        - orphan else:/elif: that follow sanitized blocks
227        - call_llm/call_openai/call_ollama functions -> agent.llm shim
228        - standalone bad lines
229        """
230        import re
231
232        LLM_PATTERNS = [
233            r"\bimport\s+(openai|anthropic|ollama|langchain)\b",
234            r"\bfrom\s+(openai|anthropic|ollama|langchain)\b",
235            r"\b(OPENAI_API_KEY|ANTHROPIC_API_KEY)\b",
236            r"os\.environ.*API_KEY",
237            r"\b(openai|anthropic|ollama)\.(OpenAI|Anthropic|Client|AsyncOpenAI|AsyncAnthropic)\b",
238            # api_key as a variable assignment (not as a dict key like 'api_key': ...)
239            r"^\s*api_key\s*=",
240            # llm_backend as a variable assignment only
241            r"^\s*agent\.state\[.llm_backend.\]\s*=",
242        ]
243
244        def line_is_bad(line):
245            return any(re.search(p, line) for p in LLM_PATTERNS)
246
247        def collect_block(lines, start, base_indent, conts=("except","else","finally","elif")):
248            j, block = start, []
249            pat = r"\s*(" + "|".join(conts) + r")\b" if conts else r"(?!x)x"
250            while j < len(lines):
251                bl = lines[j]
252                bl_ind = len(bl) - len(bl.lstrip()) if bl.strip() else base_indent + 4
253                if bl.strip() and bl_ind <= base_indent and not re.match(pat, bl):
254                    break
255                block.append(bl)
256                j += 1
257            return block, j
258
259        lines  = code.split("\n")
260        result = []
261        i      = 0
262        last_sanitized = False
263
264        while i < len(lines):
265            line     = lines[i]
266            stripped = line.strip()
267            indent   = len(line) - len(line.lstrip()) if stripped else 0
268            prefix   = " " * indent
269
270            if not stripped:
271                result.append(line)
272                last_sanitized = False
273                i += 1
274                continue
275
276            # try: blocks — nuke entirely if they touch LLM
277            if stripped == "try:":
278                block, j = collect_block(lines, i + 1, indent)
279                full = [line] + block
280                if any(line_is_bad(l) for l in full):
281                    result.append(prefix + "pass  # sanitized: LLM setup block")
282                    last_sanitized = True
283                else:
284                    result.extend(full)
285                    last_sanitized = False
286                i = j
287                continue
288
289            # if/elif whose condition references LLM vars — nuke whole branch
290            if re.match(r"\s*(if|elif)\b", line) and line_is_bad(line):
291                _, j = collect_block(lines, i + 1, indent, ("elif", "else"))
292                result.append(prefix + "pass  # sanitized: LLM conditional")
293                last_sanitized = True
294                i = j
295                continue
296
297            # orphan else:/elif: after a sanitized block — drop silently
298            if re.match(r"\s*(else\s*:|elif\b)", line) and last_sanitized:
299                _, j = collect_block(lines, i + 1, indent, ())
300                i = j
301                continue
302
303            # LLM wrapper functions — replace with agent.llm shim
304            fn_m = re.match(
305                r"(\s*)(async\s+)?def\s+"
306                r"(call_llm|call_openai|call_ollama|call_anthropic|call_gpt|"
307                r"get_llm|setup_llm|create_llm|query_llm|ask_llm|llm_call)\s*\(",
308                line,
309            )
310            if fn_m:
311                _, j = collect_block(lines, i + 1, len(fn_m.group(1)), ())
312                p, fname = fn_m.group(1), fn_m.group(3)
313                result += [
314                    p + "async def " + fname + "(agent, messages, system='', **kw):",
315                    p + "    # sanitized: rewired to agent.llm",
316                    p + "    sys_p = system or next((m.get('content','') for m in messages if m.get('role')=='system'), '')",
317                    p + "    msgs  = [m for m in messages if m.get('role') != 'system']",
318                    p + "    return await agent.llm.complete(messages=msgs, system=sys_p)",
319                ]
320                last_sanitized = False
321                i = j
322                continue
323
324            # standalone bad lines
325            if line_is_bad(line):
326                result.append(prefix + "pass  # sanitized: " + stripped[:60])
327                last_sanitized = True
328                i += 1
329                continue
330
331            last_sanitized = False
332            result.append(line)
333            i += 1
334
335        sanitized = "\n".join(result)
336
337        # ── Strip spurious `await` on known synchronous agent API methods ──
338        # LLMs write `await agent.subscribe(...)` because setup() is async.
339        # These methods already return _AwaitableNone so the code won't crash,
340        # but stripping `await` keeps the code clean and avoids confusion.
341        _SYNC_METHODS = (
342            "subscribe", "window", "persist", "recall",
343            "declare_contract", "agents", "nodes", "topics",
344            "capabilities", "increment_processed", "increment_errors",
345        )
346        _sync_pat = r"\bawait\s+(agent\.(?:" + "|".join(_SYNC_METHODS) + r")\s*\()"
347        sanitized = re.sub(_sync_pat, r"\1", sanitized)
348
349        return sanitized
350
351
352
353
354    # Max times on_start will ask the LLM to fix a syntax error before giving up
355    _MAX_COMPILE_RETRIES = 2
356
357    # ── Pre-exec safety validator ──────────────────────────────────────────
358    # Scans sanitized code for dangerous patterns BEFORE exec().
359    # This is NOT a sandbox — it's a best-effort blocklist.
360    # For true isolation, run DynamicAgents in a subprocess or container.
361
362    _BLOCKED_PATTERNS = [
363        # System-level access
364        (r'\bos\.system\s*\(',              "os.system() — use subprocess instead or avoid shell commands"),
365        (r'\bos\.popen\s*\(',               "os.popen() — use subprocess instead"),
366        (r'\bos\.exec[a-z]*\s*\(',          "os.exec*() — direct process replacement not allowed"),
367        (r'\bos\.remove\s*\(',              "os.remove() — file deletion not allowed in agent code"),
368        (r'\bos\.rmdir\s*\(',               "os.rmdir() — directory deletion not allowed"),
369        (r'\bshutil\.rmtree\s*\(',          "shutil.rmtree() — recursive deletion not allowed"),
370        (r'\bsubprocess\.(?:call|run|Popen)\s*\(.{0,20}rm\s',
371                                            "subprocess with rm — destructive shell command"),
372        # Network abuse
373        (r'\bsocket\.socket\s*\(',          "raw socket creation — use httpx or agent.publish instead"),
374        # Code execution / eval
375        (r'\beval\s*\(',                    "eval() — arbitrary code execution not allowed"),
376        (r'\b__import__\s*\(',              "__import__() — use regular import statements"),
377        # File system writes outside agent scope
378        (r'\bopen\s*\([^)]*["\'][wab]["\']', "open() in write mode — use agent.persist() instead"),
379    ]
380
381    # Patterns that are suspicious but allowed — just logged as warnings
382    _WARN_PATTERNS = [
383        (r'\bsubprocess\b',                 "subprocess usage — ensure this is necessary"),
384        (r'\bctypes\b',                     "ctypes — low-level C interface, use with caution"),
385        (r'\bpickle\.loads?\b',             "pickle — deserialization risk if data is untrusted"),
386        (r'\bwhile\s+True\s*:(?!.*await)',  "tight while-True loop without await — may block event loop"),
387    ]
388
389    def _validate_code_safety(self, code: str) -> Optional[str]:
390        """
391        Scan sanitized code for dangerous patterns before exec().
392
393        Returns an error message string if blocked, None if OK.
394        Warnings are logged but don't block execution.
395        """
396        import re
397
398        for pattern, reason in self._BLOCKED_PATTERNS:
399            if re.search(pattern, code):
400                logger.warning(f"[{self.name}] BLOCKED dangerous code pattern: {reason}")
401                return f"Code blocked for safety: {reason}"
402
403        for pattern, reason in self._WARN_PATTERNS:
404            if re.search(pattern, code):
405                logger.warning(f"[{self.name}] Safety warning: {reason}")
406
407        return None  # OK
408
409    def _compile_code(self, code: Optional[str] = None) -> Optional[str]:
410        """
411        Sanitize, validate safety, then compile LLM-generated code into self._ns.
412
413        Returns the error message string if compilation fails, None on success.
414        Callers use the error string to ask the LLM to fix the code and retry
415        (see on_start / _fix_syntax_with_llm).
416
417        Trusted agents (from the catalog) skip the safety validator — their code
418        is pre-built and tested, and may legitimately use __import__, subprocess,
419        etc. that the safety validator would block.
420        """
421        source = code if code is not None else self._code
422        clean  = self._sanitize_code(source) if not self._trusted else source
423
424        # ── Safety check before exec (skipped for trusted/catalog agents) ──
425        if not self._trusted:
426            safety_error = self._validate_code_safety(clean)
427            if safety_error:
428                return safety_error
429        else:
430            logger.info(f"[{self.name}] Trusted agent — skipping safety validator")
431
432        # Pre-inject the LLM shim so generated code can call agent.llm directly
433        def _get_llm_shim(*args, **kwargs):
434            return self._api.llm
435        self._ns["get_llm"]    = _get_llm_shim
436        self._ns["setup_llm"]  = _get_llm_shim
437        self._ns["create_llm"] = _get_llm_shim
438
439        try:
440            exec(compile(clean, f"<{self.name}>", "exec"), self._ns)
441            self._fn_setup       = self._ns.get("setup")
442            self._fn_process     = self._ns.get("process")
443            self._fn_handle_task = self._ns.get("handle_task")
444            fns = [f for f in ["setup", "process", "handle_task", "cleanup"] if f in self._ns]
445            logger.info(f"[{self.name}] Code compiled OK. Functions: {fns}")
446            if not fns:
447                logger.warning(f"[{self.name}] No functions found in compiled code.")
448            return None   # success
449        except Exception as e:
450            return f"{type(e).__name__}: {e}"
451
452    async def _fix_syntax_with_llm(self, bad_code: str, error_msg: str) -> Optional[str]:
453        """
454        Ask the configured LLM to fix a syntax error in agent code.
455
456        Returns the (possibly still-broken) code string from the LLM, or None
457        only if the LLM is completely unavailable (no provider, API error).
458        The caller is responsible for verifying the fix with _compile_code().
459        """
460        if self._llm_provider is None:
461            return None
462
463        prompt = (
464            "The following Python code has a syntax error.\n"
465            f"Error: {error_msg}\n\n"
466            "Fix ONLY the syntax error. Do not change logic or add features.\n"
467            "Return ONLY the corrected Python code — no explanations, "
468            "no markdown fences, no commentary.\n\n"
469            f"```python\n{bad_code}\n```"
470        )
471        logger.info(f"[{self.name}] Asking LLM to fix syntax error: {error_msg[:120]}")
472        await self._mqtt_publish(
473            f"agents/{self.actor_id}/logs",
474            {"type": "log",
475             "message": f"Syntax error — asking LLM to fix: {error_msg[:120]}",
476             "timestamp": time.time()},
477        )
478        try:
479            response, usage = await self._llm_provider.complete(
480                messages=[{"role": "user", "content": prompt}],
481                system="You are a Python syntax expert. Return only valid Python code.",
482                max_tokens=4096,
483            )
484            # Track cost
485            if hasattr(self, "total_input_tokens"):
486                self.total_input_tokens  += usage.get("input_tokens", 0)
487                self.total_output_tokens += usage.get("output_tokens", 0)
488                self.total_cost_usd      += usage.get("cost_usd", 0.0)
489
490            # Strip markdown fences the LLM may add despite instructions
491            fixed = response.strip()
492            if fixed.startswith("```"):
493                fixed = "\n".join(
494                    l for l in fixed.split("\n")
495                    if not l.strip().startswith("```")
496                ).strip()
497
498            return fixed   # caller validates with _compile_code()
499
500        except Exception as e:
501            logger.warning(f"[{self.name}] LLM fix call failed: {e}")
502            return None    # only None when LLM is truly unreachable
503
504    # ── Setup wrapper ───────────────────────────────────────────────────────
505
506    # Max times _run_setup will ask the LLM to fix a runtime error before giving up
507    _MAX_SETUP_RETRIES = 2
508
509    async def _run_setup(self):
510        """
511        Run setup() as a background task with LLM self-correction on failure.
512
513        If setup() raises a runtime error (e.g. TypeError from await on sync call,
514        NameError, AttributeError), the LLM is asked to fix the code and the whole
515        compile-then-setup cycle is retried up to _MAX_SETUP_RETRIES times.
516
517        - If process() is also defined, it is started AFTER setup() returns.
518          For agents whose setup() never returns (e.g. aiomqtt subscription loops),
519          process() is simply not started — the subscription loop IS the process.
520        """
521        current_code = self._code
522        last_error   = None
523
524        for attempt in range(1 + self._MAX_SETUP_RETRIES):
525            try:
526                await self._fn_setup(self._api)
527                if attempt > 0:
528                    logger.info(f"[{self.name}] setup() succeeded after {attempt} fix(es).")
529                    await self._mqtt_publish(
530                        f"agents/{self.actor_id}/logs",
531                        {"type": "log",
532                         "message": f"setup() runtime error fixed by LLM after {attempt} attempt(s).",
533                         "timestamp": time.time()},
534                    )
535                else:
536                    logger.info(f"[{self.name}] setup() completed.")
537                last_error = None
538                break
539            except asyncio.CancelledError:
540                return
541            except Exception as e:
542                last_error = e
543                err = traceback.format_exc()
544                logger.error(f"[{self.name}] setup() failed (attempt {attempt + 1}): {e}")
545
546                if attempt >= self._MAX_SETUP_RETRIES:
547                    break  # exhausted retries
548
549                # Ask LLM to fix the runtime error
550                fixed = await self._fix_runtime_with_llm(current_code, str(e), err)
551                if fixed is None:
552                    logger.warning(f"[{self.name}] LLM unavailable — cannot fix setup() error")
553                    break
554
555                # Recompile the fixed code
556                self._ns = {}
557                compile_err = self._compile_code(fixed)
558                if compile_err:
559                    logger.warning(f"[{self.name}] LLM fix introduced compile error: {compile_err}")
560                    # Try to fix the compile error too
561                    fixed2 = await self._fix_syntax_with_llm(fixed, compile_err)
562                    if fixed2:
563                        self._ns = {}
564                        compile_err2 = self._compile_code(fixed2)
565                        if compile_err2:
566                            break  # can't fix compile error either
567                        fixed = fixed2
568                    else:
569                        break
570                else:
571                    # compile_err is None — code is good
572                    pass
573
574                self._code   = fixed
575                current_code = fixed
576                logger.info(f"[{self.name}] Retrying setup() with LLM-fixed code (attempt {attempt + 1})...")
577
578        if last_error is not None:
579            err = traceback.format_exc()
580            logger.error(f"[{self.name}] setup() failed permanently: {last_error}")
581            await self._publish_error(
582                phase="setup", error=last_error, traceback_str=err, fatal=True
583            )
584            return
585
586        # setup() returned cleanly — start process() loop if defined
587        if self._fn_process and self.state not in (ActorState.STOPPED, ActorState.FAILED):
588            self._tasks.append(asyncio.create_task(self._process_loop()))
589
590    async def _fix_runtime_with_llm(
591        self, code: str, error_msg: str, traceback_str: str
592    ) -> Optional[str]:
593        """
594        Ask the LLM to fix a runtime error in agent code (setup/process).
595
596        Similar to _fix_syntax_with_llm but provides the traceback and
597        explicit guidance about the agent API (sync vs async methods).
598        """
599        if self._llm_provider is None:
600            return None
601
602        prompt = (
603            "The following Python code raised a RUNTIME ERROR when executed.\n\n"
604            f"Error: {error_msg}\n"
605            f"Traceback (last 800 chars):\n{traceback_str[-800:]}\n\n"
606            "IMPORTANT API RULES — these are the most common mistakes:\n"
607            "  - agent.subscribe(topic, callback) is SYNCHRONOUS — do NOT use await\n"
608            "  - agent.window(topic, seconds=N) is SYNCHRONOUS — do NOT use await\n"
609            "  - agent.persist(key, val) is SYNCHRONOUS — do NOT use await\n"
610            "  - agent.recall(key) is SYNCHRONOUS — do NOT use await\n"
611            "  - agent.declare_contract(...) is SYNCHRONOUS — do NOT use await\n"
612            "  - agent.agents() is SYNCHRONOUS — do NOT use await\n"
613            "  - await agent.publish(topic, data) — this IS async, use await\n"
614            "  - await agent.log(msg) — this IS async, use await\n"
615            "  - await agent.alert(msg) — this IS async, use await\n"
616            "  - await agent.send_to(name, payload) — this IS async, use await\n"
617            "  - await agent.mqtt_get(topic) — this IS async, use await\n\n"
618            "STREAMWINDOW API — w = agent.window('topic', seconds=N):\n"
619            "  StreamWindow is NOT a dict. Use methods, not dict-style access.\n"
620            "  Methods: count(), mean('field'), min('field'), max('field'),\n"
621            "           values('field'), latest(), rising('field', threshold=X),\n"
622            "           falling(), stable(), absent_for(seconds),\n"
623            "           event_count(key='k', value=V, seconds=N)\n"
624            "  WRONG: w.get('temp')        — StreamWindow is not a dict\n"
625            "  WRONG: w['temp']            — no __getitem__ by key intended\n"
626            "  RIGHT: w.latest()           — returns latest payload dict (or None)\n"
627            "  RIGHT: w.values('temp')     — list of all 'temp' values in window\n"
628            "  RIGHT: w.mean('temp')       — average of 'temp' over window\n\n"
629            "Fix the error. Return ONLY the corrected Python code — no explanations, "
630            "no markdown fences, no commentary.\n\n"
631            f"```python\n{code}\n```"
632        )
633        logger.info(f"[{self.name}] Asking LLM to fix runtime error: {error_msg[:120]}")
634        await self._mqtt_publish(
635            f"agents/{self.actor_id}/logs",
636            {"type": "log",
637             "message": f"Runtime error — asking LLM to fix: {error_msg[:120]}",
638             "timestamp": time.time()},
639        )
640        try:
641            response, usage = await self._llm_provider.complete(
642                messages=[{"role": "user", "content": prompt}],
643                system=(
644                    "You are a Python runtime-error expert for an async agent framework. "
645                    "Return only valid Python code."
646                ),
647                max_tokens=4096,
648            )
649            if hasattr(self, "total_input_tokens"):
650                self.total_input_tokens  += usage.get("input_tokens", 0)
651                self.total_output_tokens += usage.get("output_tokens", 0)
652                self.total_cost_usd      += usage.get("cost_usd", 0.0)
653
654            fixed = response.strip()
655            if fixed.startswith("```"):
656                fixed = "\n".join(
657                    l for l in fixed.split("\n")
658                    if not l.strip().startswith("```")
659                ).strip()
660            return fixed
661
662        except Exception as e:
663            logger.warning(f"[{self.name}] LLM runtime-fix call failed: {e}")
664            return None
665
666    # ── Process loop ───────────────────────────────────────────────────────
667
668    # Max time a single process() or handle_task() call can take before
669    # we assume it's stuck in a blocking call and cancel it.
670    _PROCESS_TIMEOUT = 120.0    # seconds
671    _HANDLE_TASK_TIMEOUT = 60.0
672
673    async def _process_loop(self):
674        """Continuously call the generated process() function."""
675        while self.state not in (ActorState.STOPPED, ActorState.FAILED):
676            if self.state == ActorState.PAUSED:
677                await asyncio.sleep(self.poll_interval)
678                continue
679            try:
680                await asyncio.wait_for(
681                    self._fn_process(self._api),
682                    timeout=self._PROCESS_TIMEOUT,
683                )
684                self._reset_error_count()
685            except asyncio.TimeoutError:
686                self.metrics.errors += 1
687                logger.error(
688                    f"[{self.name}] process() timed out after {self._PROCESS_TIMEOUT}s "
689                    f"— likely a blocking call without run_in_executor"
690                )
691                await self._publish_error(
692                    phase="process",
693                    error=TimeoutError(f"process() exceeded {self._PROCESS_TIMEOUT}s"),
694                    traceback_str=f"process() did not return within {self._PROCESS_TIMEOUT}s. "
695                                  f"Wrap blocking calls (cv2, torch) in: "
696                                  f"await asyncio.get_event_loop().run_in_executor(None, fn)",
697                )
698                backoff = min(2 ** self._consecutive_errors, 30)
699                await asyncio.sleep(backoff)
700            except asyncio.CancelledError:
701                break
702            except Exception as e:
703                self.metrics.errors += 1
704                tb = traceback.format_exc()
705                logger.error(f"[{self.name}] process() error: {e}\n{tb}")
706                await self._publish_error(phase="process", error=e, traceback_str=tb)
707                backoff = min(2 ** self._consecutive_errors, 30)
708                await asyncio.sleep(backoff)
709            await asyncio.sleep(self.poll_interval)
710
711    # ── Message handling ───────────────────────────────────────────────────
712
713    async def handle_message(self, msg: Message):
714        if msg.type == MessageType.TASK:
715            self.metrics.messages_processed += 1
716            if self._fn_handle_task:
717                try:
718                    result = await asyncio.wait_for(
719                        self._fn_handle_task(self._api, msg.payload or {}),
720                        timeout=self._HANDLE_TASK_TIMEOUT,
721                    )
722                    if msg.sender_id and result is not None:
723                        await self.send(msg.sender_id, MessageType.RESULT, result)
724                except asyncio.TimeoutError:
725                    logger.error(
726                        f"[{self.name}] handle_task() timed out after "
727                        f"{self._HANDLE_TASK_TIMEOUT}s"
728                    )
729                    await self._publish_error(
730                        phase="handle_task",
731                        error=TimeoutError(f"handle_task() exceeded {self._HANDLE_TASK_TIMEOUT}s"),
732                        traceback_str="",
733                    )
734                    if msg.sender_id:
735                        await self.send(msg.sender_id, MessageType.RESULT, {
736                            "error": f"handle_task() timed out after {self._HANDLE_TASK_TIMEOUT}s",
737                            "error_phase": "handle_task",
738                            "agent": self.name,
739                        })
740                except Exception as e:
741                    tb = traceback.format_exc()
742                    logger.error(f"[{self.name}] handle_task() error: {e}\n{tb}")
743                    await self._publish_error(phase="handle_task", error=e, traceback_str=tb)
744                    if msg.sender_id:
745                        await self.send(msg.sender_id, MessageType.RESULT, {
746                            "error":       str(e),
747                            "error_phase": "handle_task",
748                            "agent":       self.name,
749                        })
750            else:
751                if msg.sender_id:
752                    await self.send(msg.sender_id, MessageType.RESULT,
753                                    {"info": f"{self.name} has no handle_task defined"})
754
755    async def _publish_error(
756        self,
757        phase: str,
758        error: Exception,
759        traceback_str: str = "",
760        fatal: bool = False,
761    ):
762        """
763        Publish a structured error event to agents/{id}/errors AND send
764        a direct actor message to MonitorAgent so it works without MQTT.
765        """
766        self._consecutive_errors += 1
767        self._last_error_time     = time.time()
768        self._error_phase         = phase
769        severity = (
770            "critical"
771            if fatal or self._consecutive_errors >= self._error_threshold
772            else "warning"
773        )
774        event = {
775            "actor_id":    self.actor_id,
776            "name":        self.name,
777            "phase":       phase,
778            "error":       str(error),
779            "traceback":   traceback_str[-1200:] if traceback_str else "",
780            "consecutive": self._consecutive_errors,
781            "fatal":       fatal,
782            "severity":    severity,
783            "degraded":    self._consecutive_errors >= self._error_threshold,
784            "timestamp":   time.time(),
785        }
786        await self._mqtt_publish(f"agents/{self.actor_id}/errors", event)
787        # Direct actor message to monitor (works without MQTT broker)
788        if self._registry:
789            monitor = self._registry.find_by_name("monitor")
790            if monitor and monitor.actor_id != self.actor_id:
791                try:
792                    await self.send(monitor.actor_id, MessageType.TASK, {
793                        **event,
794                        "_monitor_error_event": True,
795                    })
796                except Exception:
797                    pass
798        # Mirror to /alert so the dashboard picks it up immediately
799        await self._mqtt_publish(f"agents/{self.actor_id}/alert", {
800            "actor_id":  self.actor_id,
801            "name":      self.name,
802            "message":   f"[{phase}] {error}",
803            "severity":  severity,
804            "timestamp": time.time(),
805        })
806
807    def _reset_error_count(self):
808        if self._consecutive_errors > 0:
809            logger.info(f"[{self.name}] Recovered — resetting error counter.")
810            self._consecutive_errors = 0
811            self._error_phase        = ""
812
813    def get_status(self) -> dict:
814        s = super().get_status()
815        s["description"] = self.description
816        s["code"]        = self._code
817        s["agent_type"]  = "dynamic"
818        return s
819
820    def _build_heartbeat(self) -> dict:
821        hb = super()._build_heartbeat()
822        hb["code"]        = self._code      # include code in every heartbeat
823        hb["description"] = self.description
824        hb["agent_type"]  = "dynamic"
825        return hb
826
827    def _current_task_description(self) -> str:
828        return self.description or "running dynamic code"

Generic actor shell. Core behavior is provided as Python source code strings. The LLM writes setup/process/handle_task functions; this class runs them.

DynamicAgent( code: str, poll_interval: float = 1.0, description: str = '', input_schema: dict = None, output_schema: dict = None, llm_provider=None, trusted: bool = False, **kwargs)
 62    def __init__(
 63        self,
 64        code: str,                          # LLM-generated Python source
 65        poll_interval: float = 1.0,         # seconds between process() calls
 66        description: str = "",              # what this agent does
 67        input_schema: dict = None,          # expected task payload fields
 68        output_schema: dict = None,         # returned result fields
 69        llm_provider=None,                  # optional LLM for agent.llm.chat()
 70        trusted: bool = False,              # True = catalog agent, skip safety validator
 71        **kwargs,
 72    ):
 73        super().__init__(**kwargs)
 74        self._code           = code
 75        self.poll_interval   = poll_interval
 76        self.description     = description
 77        self.input_schema    = input_schema  or {}
 78        self.output_schema   = output_schema or {}
 79        self._llm_provider   = llm_provider
 80        self._trusted        = trusted       # catalog agents bypass safety checks
 81
 82        # Compiled functions — populated in on_start
 83        self._fn_setup       = None
 84        self._fn_process     = None
 85        self._fn_handle_task = None
 86
 87        # Namespace shared across all calls (agent can store state here)
 88        self._ns: dict       = {}
 89
 90        # Cost tracking (populated by _LLMInterface if LLM is used)
 91        self.total_input_tokens  = 0
 92        self.total_output_tokens = 0
 93        self.total_cost_usd      = 0.0
 94
 95        # Error tracking for health classification
 96        self._consecutive_errors: int   = 0
 97        self._error_threshold:    int   = 3      # DEGRADED after this many
 98        self._last_error_time:    float = 0.0
 99        self._error_phase:        str   = ""     # compile|setup|process|handle_task
100
101        # Public API exposed to generated code via `agent` parameter
102        self._api            = _AgentAPI(self)
poll_interval
description
input_schema
output_schema
total_input_tokens
total_output_tokens
total_cost_usd
async def on_start(self):
106    async def on_start(self):
107        # ── Compile with LLM self-correction on syntax errors ─────────────
108        current_code = self._code
109        error_msg    = self._compile_code(current_code)
110
111        if error_msg:
112            for attempt in range(1, self._MAX_COMPILE_RETRIES + 1):
113                logger.warning(
114                    f"[{self.name}] Compile error (attempt {attempt}): {error_msg}"
115                )
116                fixed = await self._fix_syntax_with_llm(current_code, error_msg)
117                if fixed is None:
118                    # LLM unavailable — no point retrying
119                    break
120                self._ns = {}                      # fresh namespace for retry
121                new_err = self._compile_code(fixed)
122                if new_err is None:
123                    # Fix worked — update stored code so restarts use the good version
124                    self._code = fixed
125                    error_msg  = None
126                    logger.info(f"[{self.name}] Code fixed by LLM after {attempt} attempt(s).")
127                    await self._mqtt_publish(
128                        f"agents/{self.actor_id}/logs",
129                        {"type": "log",
130                         "message": f"Syntax error fixed by LLM after {attempt} attempt(s).",
131                         "timestamp": time.time()},
132                    )
133                    break
134                # Fix compiled but still broken — feed it back for the next attempt
135                current_code = fixed
136                error_msg    = new_err
137
138        if error_msg:
139            # All attempts exhausted — publish fatal and stop
140            err_exc = SyntaxError(error_msg)
141            logger.error(f"[{self.name}] Code compilation failed permanently: {error_msg}")
142            await self._publish_error(phase="compile", error=err_exc,
143                                      traceback_str=error_msg, fatal=True)
144            return
145
146        # ── setup() ───────────────────────────────────────────────────────
147        if self._fn_setup:
148            # Run setup as a background task so long-running loops (e.g. aiomqtt
149            # subscriptions) don't block on_start() and prevent heartbeats from firing.
150            self._tasks.append(asyncio.create_task(self._run_setup()))
151        else:
152            if self._fn_process:
153                self._tasks.append(asyncio.create_task(self._process_loop()))
154
155        # Publish manifest immediately so main's registry knows this agent exists
156        # even if it never calls publish() (pure handle_task agents, etc.)
157        await self._api._publish_manifest()

Called when actor starts. Override for init logic.

async def on_stop(self):
159    async def on_stop(self):
160        # ── Unregister from TopicBus so stale contracts don't accumulate ───
161        try:
162            from ..core.topic_bus import get_topic_bus
163            bus = get_topic_bus()
164            if bus:
165                bus.unregister(self.name)
166                logger.debug(f"[{self.name}] Unregistered from TopicBus")
167        except Exception:
168            pass  # TopicBus unavailable — not fatal
169
170        # ── Give generated code a chance to clean up ───────────────────────
171        cleanup = self._ns.get("cleanup")
172        if cleanup:
173            try:
174                await asyncio.wait_for(cleanup(self._api), timeout=10.0)
175            except asyncio.TimeoutError:
176                logger.warning(f"[{self.name}] cleanup() timed out after 10s")
177            except Exception as e:
178                logger.warning(f"[{self.name}] cleanup() error: {e}")
179
180        # ── Force-release common resources that LLM code may have opened ───
181        # Even if cleanup() didn't run or missed something, we try to release
182        # known resource types stored in agent.state.
183        state = getattr(self._api, 'state', {}) if self._api else {}
184
185        # Release cv2 VideoCapture handles
186        for key in list(state.keys()):
187            obj = state.get(key)
188            if obj is None:
189                continue
190            # cv2.VideoCapture
191            if hasattr(obj, 'release') and hasattr(obj, 'isOpened'):
192                try:
193                    if obj.isOpened():
194                        obj.release()
195                        logger.info(f"[{self.name}] Released camera handle '{key}'")
196                except Exception:
197                    pass
198            # Close any open file handles
199            elif hasattr(obj, 'close') and hasattr(obj, 'closed'):
200                try:
201                    if not obj.closed:
202                        obj.close()
203                        logger.debug(f"[{self.name}] Closed file handle '{key}'")
204                except Exception:
205                    pass
206
207        # ── Cancel any tasks spawned inside setup/process code ─────────────
208        # Generated code may have called asyncio.create_task() directly without
209        # adding to _tasks. We can't track those, but we can ensure all tasks
210        # we DO track are properly cancelled and awaited.
211        for task in self._tasks:
212            if not task.done():
213                task.cancel()
214        # Give cancelled tasks a moment to actually stop
215        if self._tasks:
216            await asyncio.gather(*self._tasks, return_exceptions=True)

Called when actor stops. Override for cleanup.

async def handle_message(self, msg: Message):
713    async def handle_message(self, msg: Message):
714        if msg.type == MessageType.TASK:
715            self.metrics.messages_processed += 1
716            if self._fn_handle_task:
717                try:
718                    result = await asyncio.wait_for(
719                        self._fn_handle_task(self._api, msg.payload or {}),
720                        timeout=self._HANDLE_TASK_TIMEOUT,
721                    )
722                    if msg.sender_id and result is not None:
723                        await self.send(msg.sender_id, MessageType.RESULT, result)
724                except asyncio.TimeoutError:
725                    logger.error(
726                        f"[{self.name}] handle_task() timed out after "
727                        f"{self._HANDLE_TASK_TIMEOUT}s"
728                    )
729                    await self._publish_error(
730                        phase="handle_task",
731                        error=TimeoutError(f"handle_task() exceeded {self._HANDLE_TASK_TIMEOUT}s"),
732                        traceback_str="",
733                    )
734                    if msg.sender_id:
735                        await self.send(msg.sender_id, MessageType.RESULT, {
736                            "error": f"handle_task() timed out after {self._HANDLE_TASK_TIMEOUT}s",
737                            "error_phase": "handle_task",
738                            "agent": self.name,
739                        })
740                except Exception as e:
741                    tb = traceback.format_exc()
742                    logger.error(f"[{self.name}] handle_task() error: {e}\n{tb}")
743                    await self._publish_error(phase="handle_task", error=e, traceback_str=tb)
744                    if msg.sender_id:
745                        await self.send(msg.sender_id, MessageType.RESULT, {
746                            "error":       str(e),
747                            "error_phase": "handle_task",
748                            "agent":       self.name,
749                        })
750            else:
751                if msg.sender_id:
752                    await self.send(msg.sender_id, MessageType.RESULT,
753                                    {"info": f"{self.name} has no handle_task defined"})

Handle messages not caught by default handlers.

def get_status(self) -> dict:
813    def get_status(self) -> dict:
814        s = super().get_status()
815        s["description"] = self.description
816        s["code"]        = self._code
817        s["agent_type"]  = "dynamic"
818        return s
class InstallerAgent(wactorz.Actor):
 74class InstallerAgent(Actor):
 75    """
 76    Pre-defined agent that installs Python packages on demand.
 77    Uses sys.executable so packages are installed into the active venv.
 78    """
 79
 80    def __init__(self, **kwargs):
 81        kwargs.setdefault("name", "installer")
 82        super().__init__(**kwargs)
 83        self.protected    = True
 84        self._install_log: list[dict] = []
 85
 86    def _current_task_description(self) -> str:
 87        return "idle"
 88
 89    async def on_start(self):
 90        logger.info(f"[{self.name}] Installer ready — using: {sys.executable}")
 91        await self._mqtt_publish(
 92            f"agents/{self.actor_id}/logs",
 93            {"type": "log", "message": f"Installer ready ({sys.executable})", "timestamp": time.time()},
 94        )
 95        await self.publish_manifest(
 96            description="Installs Python packages on demand via pip",
 97            capabilities=["pip_install", "package_management"],
 98        )
 99
100    async def handle_message(self, msg: Message):
101        if msg.type == MessageType.TASK:
102            result = await self._handle_install(msg)
103            # Echo task_id back so caller's future can resolve
104            if isinstance(msg.payload, dict):
105                task_id = msg.payload.get("task") or msg.payload.get("_task_id")
106                if task_id:
107                    result["task"] = task_id
108                    result["_task_id"] = task_id
109            target = msg.reply_to or msg.sender_id
110            if target:
111                await self.send(target, MessageType.RESULT, result)
112
113    async def _handle_install(self, msg: Message) -> dict:
114        payload = msg.payload if isinstance(msg.payload, dict) else {}
115        action  = payload.get("action", "install")
116
117        if action == "install":
118            packages = payload.get("packages", [])
119            if isinstance(packages, str):
120                packages = [p.strip() for p in packages.replace(",", " ").split()]
121            return await self._install_packages(packages)
122
123        if action == "check":
124            packages = payload.get("packages", [])
125            if isinstance(packages, str):
126                packages = [p.strip() for p in packages.replace(",", " ").split()]
127            return self._check_packages(packages)
128
129        if action == "resolve":
130            return self._resolve_imports(payload.get("imports", []))
131
132        if action == "history":
133            return {"history": self._install_log[-20:]}
134
135        if action == "node_install":
136            # Install packages on a remote node via SSH
137            # payload: {host, user, packages, password (opt), key_path (opt)}
138            return await self._node_install(payload)
139
140        if action == "node_deploy":
141            # Full bootstrap: copy remote_runner.py + install deps + start runner
142            # payload: {host, user, node_name, broker, password (opt), key_path (opt)}
143            return await self._node_deploy(payload)
144
145        if action == "node_install_for_agent":
146            # Install packages needed by a specific agent on its remote node
147            # payload: {host, user, packages, agent_name, password (opt), key_path (opt)}
148            return await self._node_install(payload)
149
150        if action == "node_run":
151            # Run an arbitrary command on a remote node via SSH
152            # payload: {host, user, command, password (opt), key_path (opt)}
153            return await self._node_run(payload)
154
155        return {"error": f"Unknown action: {action}"}
156
157    # ── Core install logic ──────────────────────────────────────────────────
158
159    async def _install_packages(self, packages: list[str]) -> dict:
160        if not packages:
161            return {"error": "No packages specified"}
162
163        results = {}
164        failed  = []
165
166        for pkg in packages:
167            pkg = pkg.strip()
168            if not pkg:
169                continue
170
171            # Resolve import name → pip name (e.g. "cv2" → "opencv-python")
172            pip_name = IMPORT_TO_PACKAGE.get(pkg, pkg)
173
174            # Check if already importable (invalidate cache so fresh installs show up)
175            import_name = PACKAGE_TO_IMPORT.get(pip_name, pip_name)
176            if self._is_installed(import_name):
177                logger.info(f"[{self.name}] {pip_name} already installed.")
178                results[pip_name] = "already_installed"
179                continue
180
181            logger.info(f"[{self.name}] Installing {pip_name} into {sys.executable}...")
182            await self._mqtt_publish(
183                f"agents/{self.actor_id}/logs",
184                {"type": "log", "message": f"Installing {pip_name}...", "timestamp": time.time()},
185            )
186
187            success, output = await self._pip_install(pip_name)
188
189            # duckduckgo-search was renamed to ddgs in v9 — try the other name as fallback
190            if not success and pip_name in ("duckduckgo-search", "ddgs"):
191                alt = "ddgs" if pip_name == "duckduckgo-search" else "duckduckgo-search"
192                logger.info(f"[{self.name}] Trying alternative name: {alt}")
193                success, output = await self._pip_install(alt)
194                if success:
195                    pip_name = alt
196
197            # pdfplumber sometimes fails on Windows — try pymupdf (fitz) as fallback
198            if not success and pip_name == "pdfplumber":
199                logger.info(f"[{self.name}] pdfplumber failed, trying pymupdf as fallback...")
200                success, output = await self._pip_install("pymupdf")
201                if success:
202                    pip_name = "pymupdf"
203
204            results[pip_name] = "installed" if success else f"failed: {output[-300:]}"
205            if not success:
206                failed.append(pip_name)
207
208            self._install_log.append({
209                "package":   pip_name,
210                "success":   success,
211                "timestamp": time.time(),
212                "output":    output[-500:],
213            })
214
215            if success:
216                status = f"✓ {pip_name} installed"
217            else:
218                # Show the actual pip error so failures are diagnosable
219                err_snippet = output[-400:].strip().replace("\n", " | ")
220                status = f"✗ {pip_name} FAILED: {err_snippet}"
221            logger.info(f"[{self.name}] {status}")
222            await self._mqtt_publish(
223                f"agents/{self.actor_id}/logs",
224                {"type": "log", "message": status, "timestamp": time.time()},
225            )
226
227        return {
228            "results": results,
229            "failed":  failed,
230            "success": len(failed) == 0,
231            "message": f"Installed {len(results) - len(failed)}/{len(results)} packages",
232        }
233
234    async def _pip_install(self, package: str) -> tuple[bool, str]:
235        """Run pip install using the same interpreter that launched this process.
236
237        sys.executable inside a venv points to  venv/Scripts/python.exe  (Windows)
238        or  venv/bin/python  (Linux/Mac), so packages always land in the right place.
239
240        Uses subprocess.run() in a thread executor instead of asyncio.create_subprocess_exec()
241        because asyncio subprocesses are unreliable on Windows with SelectorEventLoop
242        (the default in some Python versions / environments). subprocess.run() works
243        correctly on all platforms.
244        """
245        import subprocess
246
247        cmd = [sys.executable, "-m", "pip", "install", package, "--quiet"]
248        if sys.platform != "win32":
249            cmd.append("--break-system-packages")
250
251        def _run_pip() -> tuple[bool, str]:
252            try:
253                result = subprocess.run(
254                    cmd,
255                    stdout=subprocess.PIPE,
256                    stderr=subprocess.PIPE,
257                    timeout=180,
258                )
259                output = (result.stdout + result.stderr).decode("utf-8", errors="replace")
260                return result.returncode == 0, output
261            except subprocess.TimeoutExpired:
262                return False, "pip timed out after 180s"
263            except FileNotFoundError:
264                return False, f"Python executable not found: {sys.executable}"
265            except Exception as e:
266                return False, f"{type(e).__name__}: {e}"
267
268        try:
269            loop    = asyncio.get_event_loop()
270            success, output = await loop.run_in_executor(None, _run_pip)
271
272            if success:
273                # Refresh import machinery so the new package is visible immediately
274                importlib.invalidate_caches()
275
276            return success, output
277
278        except Exception as e:
279            return False, f"Executor error: {type(e).__name__}: {e}"
280
281    def _is_installed(self, import_name: str) -> bool:
282        """Check importability, always refreshing the import cache first."""
283        importlib.invalidate_caches()
284        try:
285            importlib.import_module(import_name)
286            return True
287        except ImportError:
288            return False
289
290    # ── Helper actions ──────────────────────────────────────────────────────
291
292    def _check_packages(self, packages: list[str]) -> dict:
293        status = {}
294        for pkg in packages:
295            pip_name    = IMPORT_TO_PACKAGE.get(pkg, pkg)
296            import_name = PACKAGE_TO_IMPORT.get(pip_name, pip_name)
297            status[pkg] = "installed" if self._is_installed(import_name) else "missing"
298        return {"status": status}
299
300    def _resolve_imports(self, imports: list[str]) -> dict:
301        return {"resolved": {imp: IMPORT_TO_PACKAGE.get(imp, imp) for imp in imports}}
302
303    # ── Remote node helpers (SSH via asyncssh) ──────────────────────────────
304
305    def _ssh_kwargs(self, payload: dict) -> dict:
306        """
307        Build asyncssh connection kwargs from a task payload.
308        Falls back to persisted credentials from a previous node_deploy
309        so callers don't need to pass password/key_path every time.
310        """
311        host      = payload["host"]
312        user      = payload.get("user", "pi")
313        password  = payload.get("password")
314        key_path  = payload.get("key_path")
315
316        # Fall back to persisted credentials if not in payload
317        # Try to find node_name from host
318        if not password and not key_path:
319            for key in self._state.keys() if hasattr(self, "_state") else []:
320                pass
321            # Scan persisted node credentials by matching host
322            node_name = payload.get("node_name") or payload.get("node")
323            if not node_name:
324                # Try to find node by host
325                for k, v in (self.recall("_node_credentials") or {}).items():
326                    if v.get("host") == host:
327                        node_name = k
328                        break
329            if node_name:
330                creds    = (self.recall("_node_credentials") or {}).get(node_name, {})
331                password = password or creds.get("password")
332                key_path = key_path or creds.get("key_path")
333                user     = user or creds.get("user", "pi")
334
335        kwargs = dict(
336            host        = host,
337            username    = user,
338            known_hosts = None,   # disable host key checking for LAN deploys
339        )
340        if password:
341            kwargs["password"] = password
342        if key_path:
343            kwargs["client_keys"] = [key_path]
344        return kwargs
345
346    def _persist_node_credentials(self, node_name: str, host: str, user: str,
347                                   password: str = None, key_path: str = None):
348        """Store SSH credentials for a node so future connections don't need them passed explicitly."""
349        creds = self.recall("_node_credentials") or {}
350        creds[node_name] = {
351            "host":     host,
352            "user":     user,
353            "password": password or "",
354            "key_path": key_path or "",
355        }
356        self.persist("_node_credentials", creds)
357        # Also persist individually for backward compat with _spawn_remote lookups
358        self.persist(f"node_host_{node_name}", host)
359        self.persist(f"node_user_{node_name}", user)
360        logger.info(f"[{self.name}] Persisted SSH credentials for node '{node_name}'")
361
362    async def _ssh_run(self, conn, command: str) -> tuple[bool, str]:
363        """Run a single command over an open SSH connection. Returns (ok, output)."""
364        result = await conn.run(command, check=False)
365        output = (result.stdout or "") + (result.stderr or "")
366        return result.exit_status == 0, output.strip()
367
368    def _log_remote(self, message: str):
369        logger.info(f"[{self.name}] {message}")
370        asyncio.create_task(self._mqtt_publish(
371            f"agents/{self.actor_id}/logs",
372            {"type": "log", "message": message, "timestamp": time.time()},
373        ))
374
375    async def _node_install(self, payload: dict) -> dict:
376        """
377        Install pip packages on a remote node via SSH.
378
379        payload keys:
380          host      — IP or hostname of the remote machine
381          user      — SSH username (default: "pi")
382          packages  — list of package names to install
383          password  — SSH password (optional, prefer key auth)
384          key_path  — path to SSH private key (optional)
385        """
386        try:
387            import asyncssh
388        except ImportError:
389            return {"error": "asyncssh not installed. Run: pip install asyncssh"}
390
391        host     = payload.get("host")
392        packages = payload.get("packages", [])
393        if isinstance(packages, str):
394            packages = [p.strip() for p in packages.replace(",", " ").split()]
395        if not host:
396            return {"error": "Missing 'host' in payload"}
397        if not packages:
398            return {"error": "No packages specified"}
399
400        pkg_str = " ".join(packages)
401        self._log_remote(f"Installing {pkg_str} on {host}...")
402
403        try:
404            async with asyncssh.connect(**self._ssh_kwargs(payload)) as conn:
405                # Detect the right pip to use:
406                # 1. Venv at ~/wactorz/venv (created by node_deploy) — always prefer this
407                # 2. Fall back to python3 -m pip with --break-system-packages
408                ok, venv_check = await self._ssh_run(
409                    conn, "test -f ~/wactorz/venv/bin/pip && echo yes || echo no"
410                )
411                if venv_check.strip() == "yes":
412                    pip_cmd = f"~/wactorz/venv/bin/pip install {pkg_str} -q 2>&1"
413                    self._log_remote(f"Using venv pip at ~/wactorz/venv/bin/pip")
414                else:
415                    # No venv — try to create one first
416                    self._log_remote("No venv found — creating ~/wactorz/venv first...")
417                    await self._ssh_run(conn, "mkdir -p ~/wactorz && python3 -m venv ~/wactorz/venv")
418                    ok, venv_check2 = await self._ssh_run(
419                        conn, "test -f ~/wactorz/venv/bin/pip && echo yes || echo no"
420                    )
421                    if venv_check2.strip() == "yes":
422                        pip_cmd = f"~/wactorz/venv/bin/pip install {pkg_str} -q 2>&1"
423                        self._log_remote("Venv created successfully")
424                    else:
425                        pip_cmd = f"python3 -m pip install {pkg_str} --break-system-packages -q 2>&1"
426                        self._log_remote("Venv creation failed — falling back to system pip")
427
428                ok, output = await self._ssh_run(conn, pip_cmd)
429                if ok:
430                    self._log_remote(f"✓ {pkg_str} installed on {host}")
431                    return {"success": True, "host": host, "packages": packages, "output": output[-300:]}
432                else:
433                    self._log_remote(f"✗ Install failed on {host}: {output[-200:]}")
434                    return {"success": False, "host": host, "error": output[-400:]}
435
436        except Exception as e:
437            return {"success": False, "host": host, "error": str(e)}
438
439    async def _node_deploy(self, payload: dict) -> dict:
440        """
441        Full bootstrap of a new Wactorz edge node via SSH.
442
443        Steps:
444          1. Create ~/wactorz/ directory
445          2. Upload remote_runner.py
446          3. Install aiomqtt (the only runtime dependency)
447          4. Kill any existing runner with the same node name
448          5. Start the runner in the background
449          6. Verify it appears online within 15 seconds
450
451        payload keys:
452          host       — IP or hostname
453          user       — SSH username (default: "pi")
454          node_name  — name this node will use (default: "remote-node")
455          broker     — MQTT broker host reachable FROM the Pi (default: "localhost")
456          password   — SSH password (optional)
457          key_path   — path to SSH private key (optional)
458          port       — MQTT broker port (default: 1883)
459        """
460        try:
461            import asyncssh
462        except ImportError:
463            return {"error": "asyncssh not installed. Run: pip install asyncssh"}
464
465        host      = payload.get("host")
466        user      = payload.get("user", "pi")
467        node_name = payload.get("node_name", "remote-node")
468        broker    = payload.get("broker", "localhost")
469        mqtt_port = payload.get("port", 1883)
470
471        if not host:
472            return {"error": "Missing 'host' in payload"}
473
474        # Find remote_runner.py relative to this file
475        import pathlib
476        candidates = [
477            pathlib.Path(__file__).parent.parent / "remote_runner.py",
478            pathlib.Path("remote_runner.py"),
479            pathlib.Path(__file__).parent.parent.parent / "remote_runner.py",
480        ]
481        runner_path = next((p for p in candidates if p.exists()), None)
482        if not runner_path:
483            return {"error": "remote_runner.py not found. Make sure it is in the wactorz root."}
484
485        self._log_remote(f"Deploying node '{node_name}' to {user}@{host}...")
486
487        try:
488            async with asyncssh.connect(**self._ssh_kwargs(payload)) as conn:
489
490                # 1. Create directory
491                await self._ssh_run(conn, "mkdir -p ~/wactorz")
492                self._log_remote(f"[{node_name}] Directory created.")
493
494                # 2. Upload remote_runner.py
495                async with conn.start_sftp_client() as sftp:
496                    await sftp.put(str(runner_path), f"/home/{user}/wactorz/remote_runner.py")
497                self._log_remote(f"[{node_name}] remote_runner.py uploaded.")
498
499                # 3. Create venv if it doesn't exist — avoids all --break-system-packages issues
500                ok, out = await self._ssh_run(
501                    conn, "test -d ~/wactorz/venv && echo exists || python3 -m venv ~/wactorz/venv && echo created"
502                )
503                self._log_remote(f"[{node_name}] venv: {out.strip()}")
504
505                # 4. Install aiomqtt into the venv
506                ok, out = await self._ssh_run(
507                    conn, "~/wactorz/venv/bin/pip install aiomqtt psutil -q 2>&1"
508                )
509                if not ok:
510                    self._log_remote(f"[{node_name}] pip install warning: {out[:150]}")
511                else:
512                    self._log_remote(f"[{node_name}] aiomqtt installed into venv.")
513
514                # 5. Kill any existing instance with this node name
515                await self._ssh_run(
516                    conn,
517                    f"pkill -f 'remote_runner.py.*--name {node_name}' 2>/dev/null; true"
518                )
519
520                # 6. Start runner using venv python in the background
521                cmd = (
522                    f"nohup ~/wactorz/venv/bin/python ~/wactorz/remote_runner.py "
523                    f"--broker {broker} --port {mqtt_port} --name {node_name} "
524                    f"> ~/wactorz/{node_name}.log 2>&1 &"
525                )
526                await self._ssh_run(conn, cmd)
527                self._log_remote(f"[{node_name}] Runner started with venv python.")
528
529            self._log_remote(
530                f"[{node_name}] Deploy complete! Node will appear in /nodes within 15s."
531            )
532            # Persist SSH credentials so future installs don't need them passed again
533            self._persist_node_credentials(
534                node_name = node_name,
535                host      = host,
536                user      = user,
537                password  = payload.get("password"),
538                key_path  = payload.get("key_path"),
539            )
540            return {
541                "success":   True,
542                "node_name": node_name,
543                "host":      host,
544                "broker":    broker,
545                "message":   (
546                    f"Node '{node_name}' deployed to {user}@{host}. "
547                    f"It will appear in /nodes within ~15 seconds."
548                ),
549            }
550
551        except Exception as e:
552            msg = f"Deploy failed for '{node_name}' on {host}: {e}"
553            self._log_remote(msg)
554            return {"success": False, "node_name": node_name, "host": host, "error": str(e)}
555
556    async def _node_run(self, payload: dict) -> dict:
557        """
558        Run an arbitrary shell command on a remote node via SSH.
559
560        payload keys:
561          host     — IP or hostname
562          user     — SSH username (default: "pi")
563          command  — shell command to run
564          password / key_path — auth (optional)
565        """
566        try:
567            import asyncssh
568        except ImportError:
569            return {"error": "asyncssh not installed. Run: pip install asyncssh"}
570
571        host    = payload.get("host")
572        command = payload.get("command", "echo hello")
573        if not host:
574            return {"error": "Missing 'host' in payload"}
575
576        self._log_remote(f"Running on {host}: {command[:80]}")
577        try:
578            async with asyncssh.connect(**self._ssh_kwargs(payload)) as conn:
579                ok, output = await self._ssh_run(conn, command)
580                return {
581                    "success":   ok,
582                    "host":      host,
583                    "command":   command,
584                    "output":    output,
585                    "exit_code": 0 if ok else 1,
586                }
587        except Exception as e:
588            return {"success": False, "host": host, "error": str(e)}

Pre-defined agent that installs Python packages on demand. Uses sys.executable so packages are installed into the active venv.

InstallerAgent(**kwargs)
80    def __init__(self, **kwargs):
81        kwargs.setdefault("name", "installer")
82        super().__init__(**kwargs)
83        self.protected    = True
84        self._install_log: list[dict] = []
protected
async def on_start(self):
89    async def on_start(self):
90        logger.info(f"[{self.name}] Installer ready — using: {sys.executable}")
91        await self._mqtt_publish(
92            f"agents/{self.actor_id}/logs",
93            {"type": "log", "message": f"Installer ready ({sys.executable})", "timestamp": time.time()},
94        )
95        await self.publish_manifest(
96            description="Installs Python packages on demand via pip",
97            capabilities=["pip_install", "package_management"],
98        )

Called when actor starts. Override for init logic.

async def handle_message(self, msg: Message):
100    async def handle_message(self, msg: Message):
101        if msg.type == MessageType.TASK:
102            result = await self._handle_install(msg)
103            # Echo task_id back so caller's future can resolve
104            if isinstance(msg.payload, dict):
105                task_id = msg.payload.get("task") or msg.payload.get("_task_id")
106                if task_id:
107                    result["task"] = task_id
108                    result["_task_id"] = task_id
109            target = msg.reply_to or msg.sender_id
110            if target:
111                await self.send(target, MessageType.RESULT, result)

Handle messages not caught by default handlers.

class CatalogAgent(wactorz.Actor):
258class CatalogAgent(Actor):
259    """
260    Pre-built agent recipe library.
261    Spawns any catalog agent on request by delegating to main's spawn pipeline.
262    """
263
264    def __init__(self, **kwargs):
265        kwargs.setdefault("name", "catalog")
266        super().__init__(**kwargs)
267        self.protected = True
268        self._catalog  = _build_catalog()
269
270    # ── Lifecycle ──────────────────────────────────────────────────────────────
271
272    async def on_start(self):
273        names = list(self._catalog.keys())
274        logger.info(f"[{self.name}] Catalog ready — {len(names)} recipe(s): {names}")
275        await self._mqtt_publish(
276            f"agents/{self.actor_id}/logs",
277            {"type": "log",
278             "message": f"Catalog ready: {', '.join(names)}",
279             "timestamp": time.time()},
280        )
281
282        await self.publish_manifest(
283            description=(
284                "Pre-built agent recipe library. "
285                "Spawns ready-made agents by name without requiring code. "
286                f"Available: {', '.join(names)}"
287            ),
288            capabilities=["spawn_catalog_agent", "list_catalog_agents", "agent_catalog"],
289            input_schema={"action": "str — 'spawn' | 'list' | 'info'",
290                          "agent":  "str — agent name for spawn/info actions"},
291            output_schema={"ok": "bool", "message": "str",
292                           "agents": "list", "recipe": "dict"},
293        )
294
295        # Inject recipe manifests directly into main's _agent_manifests dict
296        main = None
297        for _ in range(20):
298            main = self._registry.find_by_name("main") if self._registry else None
299            if main and hasattr(main, "_agent_manifests"):
300                break
301            await asyncio.sleep(0.5)
302
303        for name, recipe in self._catalog.items():
304            manifest = {
305                "name":          name,
306                "actor_id":      f"catalog.{name}",
307                "description":   recipe.get("description", ""),
308                "capabilities":  recipe.get("capabilities", []),
309                "input_schema":  recipe.get("input_schema",  {}),
310                "output_schema": recipe.get("output_schema", {}),
311                "publishes":     [],
312                "spawnable":     True,
313                "catalog":       self.name,
314                "timestamp":     time.time(),
315            }
316
317            if main and hasattr(main, "_agent_manifests"):
318                main._agent_manifests[name] = manifest
319                logger.info(f"[{self.name}] Injected manifest for '{name}' into main")
320            else:
321                logger.warning(f"[{self.name}] main not ready — could not inject manifest for '{name}'")
322
323    def _current_task_description(self) -> str:
324        return f"catalog ({len(self._catalog)} recipes)"
325
326    # ── Message handling ───────────────────────────────────────────────────────
327
328    async def handle_message(self, msg: Message):
329        if msg.type != MessageType.TASK:
330            return
331
332        payload = msg.payload if msg.payload is not None else {}
333        result  = await self._handle(payload)
334
335        task_id = payload.get("task") or payload.get("_task_id") if isinstance(payload, dict) else None
336        if task_id:
337            result["task"]     = task_id
338            result["_task_id"] = task_id
339
340        target = msg.reply_to or msg.sender_id
341        if target:
342            await self.send(target, MessageType.RESULT, result)
343
344    async def _handle(self, payload) -> dict:
345        if isinstance(payload, dict) and payload.get("action"):
346            action = payload["action"].lower().strip()
347            if action == "list":
348                return self._action_list()
349            if action == "info":
350                return self._action_info(payload.get("agent", ""))
351            if action == "spawn":
352                return await self._action_spawn(payload.get("agent", ""), payload)
353            return {"ok": False, "message": f"Unknown action '{action}'. Use: spawn | list | info"}
354
355        if isinstance(payload, dict) and "spawn" in payload and isinstance(payload["spawn"], str):
356            return await self._action_spawn(payload["spawn"], payload)
357
358        if isinstance(payload, str):
359            text = payload.strip()
360        elif isinstance(payload, dict):
361            text = (payload.get("text") or payload.get("message") or payload.get("query") or "").strip()
362        else:
363            text = ""
364
365        if text:
366            parts = text.split(None, 1)
367            cmd   = parts[0].lower()
368            arg   = parts[1].strip() if len(parts) > 1 else ""
369            if cmd == "list":
370                return self._action_list()
371            if cmd == "info":
372                return self._action_info(arg)
373            if cmd == "spawn":
374                return await self._action_spawn(arg, {})
375            if cmd in self._catalog:
376                return await self._action_spawn(cmd, {})
377
378        return self._action_list()
379
380    # ── Actions ────────────────────────────────────────────────────────────────
381
382    def _action_list(self) -> dict:
383        agents = []
384        for name, recipe in self._catalog.items():
385            agents.append({
386                "name":         name,
387                "description":  recipe.get("description", ""),
388                "capabilities": recipe.get("capabilities", []),
389            })
390        return {
391            "ok":      True,
392            "message": f"{len(agents)} agent(s) available in catalog",
393            "agents":  agents,
394        }
395
396    def _action_info(self, name: str) -> dict:
397        if not name:
398            return {"ok": False, "message": "Provide 'agent' name for info action"}
399        recipe = self._catalog.get(name)
400        if not recipe:
401            available = list(self._catalog.keys())
402            return {"ok": False, "message": f"'{name}' not in catalog. Available: {available}"}
403        safe = {k: v for k, v in recipe.items() if k != "code"}
404        return {"ok": True, "message": f"Recipe for '{name}'", "recipe": safe}
405
406    async def _action_spawn(self, name: str, payload: dict) -> dict:
407        if not name:
408            return {"ok": False, "message": "Provide 'agent' name to spawn"}
409
410        recipe = self._catalog.get(name)
411        if not recipe:
412            available = list(self._catalog.keys())
413            return {"ok": False, "message": f"'{name}' not in catalog. Available: {available}"}
414
415        if not self._registry:
416            return {"ok": False, "message": "No registry available — cannot spawn"}
417
418        existing = self._registry.find_by_name(name)
419        if existing:
420            return {"ok": True, "message": f"'{name}' is already running"}
421
422        logger.info(f"[{self.name}] Spawning '{name}'...")
423        await self._mqtt_publish(
424            f"agents/{self.actor_id}/logs",
425            {"type": "log", "message": f"Spawning '{name}'...", "timestamp": time.time()},
426        )
427
428        try:
429            from .dynamic_agent import DynamicAgent
430
431            install = recipe.get("install", [])
432            if install:
433                installer = self._registry.find_by_name("installer") if self._registry else None
434                if installer:
435                    logger.info(f"[{self.name}] Installing deps for '{name}': {install}")
436                    import uuid as _uuid
437                    task_id = f"cat_install_{_uuid.uuid4().hex[:8]}"
438                    future  = asyncio.get_running_loop().create_future()
439                    main = self._registry.find_by_name("main") if self._registry else None
440                    if main:
441                        main._result_futures[task_id] = future
442                    await self.send(installer.actor_id, MessageType.TASK, {
443                        "action":   "install",
444                        "packages": install,
445                        "task":     task_id,
446                        "_task_id": task_id,
447                    })
448                    try:
449                        await asyncio.wait_for(future, timeout=120.0)
450                    except asyncio.TimeoutError:
451                        logger.warning(f"[{self.name}] Install timeout for '{name}' — proceeding anyway")
452                else:
453                    logger.warning(f"[{self.name}] installer not found — skipping dep install for '{name}'")
454
455            main = self._registry.find_by_name("main")
456            llm_provider    = getattr(main, "llm", None) if main else None
457            persistence_dir = str(getattr(main, "_persistence_dir", "./state/main").parent) if main else "./state"
458
459            actor = await self.spawn(
460                DynamicAgent,
461                name            = name,
462                code            = recipe["code"],
463                poll_interval   = float(recipe.get("poll_interval", 3600)),
464                description     = recipe.get("description", ""),
465                input_schema    = recipe.get("input_schema", {}),
466                output_schema   = recipe.get("output_schema", {}),
467                llm_provider    = llm_provider,
468                persistence_dir = persistence_dir,
469                trusted         = True,   # catalog agents are pre-built — skip safety validator
470            )
471
472            if actor:
473                if main and hasattr(main, "_save_to_spawn_registry"):
474                    # Mark as trusted so it bypasses safety validator on restore
475                    save_config = dict(recipe)
476                    save_config["trusted"] = True
477                    main._save_to_spawn_registry(save_config)
478
479                msg = f"'{name}' spawned and running"
480                logger.info(f"[{self.name}] {msg}")
481                await self._mqtt_publish(
482                    f"agents/{self.actor_id}/logs",
483                    {"type": "log", "message": msg, "timestamp": time.time()},
484                )
485                return {"ok": True, "message": msg, "agent": name}
486            else:
487                return {"ok": False, "message": f"Spawn returned no actor for '{name}'"}
488
489        except Exception as e:
490            msg = f"Failed to spawn '{name}': {e}"
491            logger.error(f"[{self.name}] {msg}")
492            return {"ok": False, "message": msg}
493
494    # ── Public API ─────────────────────────────────────────────────────────────
495
496    def list_recipes(self) -> list[str]:
497        return list(self._catalog.keys())
498
499    def get_recipe(self, name: str) -> Optional[dict]:
500        return self._catalog.get(name)

Pre-built agent recipe library. Spawns any catalog agent on request by delegating to main's spawn pipeline.

CatalogAgent(**kwargs)
264    def __init__(self, **kwargs):
265        kwargs.setdefault("name", "catalog")
266        super().__init__(**kwargs)
267        self.protected = True
268        self._catalog  = _build_catalog()
protected
async def on_start(self):
272    async def on_start(self):
273        names = list(self._catalog.keys())
274        logger.info(f"[{self.name}] Catalog ready — {len(names)} recipe(s): {names}")
275        await self._mqtt_publish(
276            f"agents/{self.actor_id}/logs",
277            {"type": "log",
278             "message": f"Catalog ready: {', '.join(names)}",
279             "timestamp": time.time()},
280        )
281
282        await self.publish_manifest(
283            description=(
284                "Pre-built agent recipe library. "
285                "Spawns ready-made agents by name without requiring code. "
286                f"Available: {', '.join(names)}"
287            ),
288            capabilities=["spawn_catalog_agent", "list_catalog_agents", "agent_catalog"],
289            input_schema={"action": "str — 'spawn' | 'list' | 'info'",
290                          "agent":  "str — agent name for spawn/info actions"},
291            output_schema={"ok": "bool", "message": "str",
292                           "agents": "list", "recipe": "dict"},
293        )
294
295        # Inject recipe manifests directly into main's _agent_manifests dict
296        main = None
297        for _ in range(20):
298            main = self._registry.find_by_name("main") if self._registry else None
299            if main and hasattr(main, "_agent_manifests"):
300                break
301            await asyncio.sleep(0.5)
302
303        for name, recipe in self._catalog.items():
304            manifest = {
305                "name":          name,
306                "actor_id":      f"catalog.{name}",
307                "description":   recipe.get("description", ""),
308                "capabilities":  recipe.get("capabilities", []),
309                "input_schema":  recipe.get("input_schema",  {}),
310                "output_schema": recipe.get("output_schema", {}),
311                "publishes":     [],
312                "spawnable":     True,
313                "catalog":       self.name,
314                "timestamp":     time.time(),
315            }
316
317            if main and hasattr(main, "_agent_manifests"):
318                main._agent_manifests[name] = manifest
319                logger.info(f"[{self.name}] Injected manifest for '{name}' into main")
320            else:
321                logger.warning(f"[{self.name}] main not ready — could not inject manifest for '{name}'")

Called when actor starts. Override for init logic.

async def handle_message(self, msg: Message):
328    async def handle_message(self, msg: Message):
329        if msg.type != MessageType.TASK:
330            return
331
332        payload = msg.payload if msg.payload is not None else {}
333        result  = await self._handle(payload)
334
335        task_id = payload.get("task") or payload.get("_task_id") if isinstance(payload, dict) else None
336        if task_id:
337            result["task"]     = task_id
338            result["_task_id"] = task_id
339
340        target = msg.reply_to or msg.sender_id
341        if target:
342            await self.send(target, MessageType.RESULT, result)

Handle messages not caught by default handlers.

def list_recipes(self) -> list[str]:
496    def list_recipes(self) -> list[str]:
497        return list(self._catalog.keys())
def get_recipe(self, name: str) -> Optional[dict]:
499    def get_recipe(self, name: str) -> Optional[dict]:
500        return self._catalog.get(name)
def HomeAssistantHardwareAgent(*_hw_args, **_hw_kwargs):
428def HomeAssistantHardwareAgent(*_hw_args, **_hw_kwargs):  # type: ignore[no-redef]
429    _warnings_hw.warn(
430        "HomeAssistantHardwareAgent is deprecated and will be removed in a future release. "
431        "Use HomeAssistantAgent instead.",
432        DeprecationWarning,
433        stacklevel=2,
434    )
435    _hw_kwargs.setdefault("name", "home-assistant-agent")
436    from .home_assistant_agent import HomeAssistantAgent as _HA  # noqa: PLC0415
437    return _HA(*_hw_args, **_hw_kwargs)