Coverage for little_loops / fsm / validation.py: 89%
282 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-05-22 16:19 -0500
« prev ^ index » next coverage.py v7.12.0, created at 2026-05-22 16:19 -0500
1"""FSM loop validation logic.
3This module provides validation for FSM loop definitions, ensuring
4structural correctness and catching common configuration errors.
6Validation checks:
7- Initial state exists in states dict
8- All referenced states exist
9- At least one terminal state
10- Evaluator configs have required fields for their type
11- No conflicting routing (shorthand vs full route)
12- Numeric fields in valid ranges (max_iterations > 0, backoff >= 0, timeout > 0)
13"""
15from __future__ import annotations
17import logging
18from collections import deque
19from dataclasses import dataclass
20from enum import Enum
21from pathlib import Path
22from typing import Any
24import yaml
26from little_loops.fsm.fragments import resolve_flow, resolve_fragments, resolve_inheritance
27from little_loops.fsm.schema import EvaluateConfig, FSMLoop, ParameterSpec, StateConfig
29logger = logging.getLogger(__name__)
32class ValidationSeverity(Enum):
33 """Severity level for validation issues."""
35 ERROR = "error"
36 WARNING = "warning"
39@dataclass
40class ValidationError:
41 """Structured validation error.
43 Attributes:
44 message: Human-readable error description
45 path: Path to the problematic element (e.g., "states.check.route")
46 severity: Error severity (error or warning)
47 """
49 message: str
50 path: str | None = None
51 severity: ValidationSeverity = ValidationSeverity.ERROR
53 def __str__(self) -> str:
54 """Format error for display."""
55 prefix = f"[{self.severity.value.upper()}]"
56 if self.path:
57 return f"{prefix} {self.path}: {self.message}"
58 return f"{prefix} {self.message}"
61# Evaluator type to required fields mapping
62EVALUATOR_REQUIRED_FIELDS: dict[str, list[str]] = {
63 "exit_code": [],
64 "output_numeric": ["operator", "target"],
65 "output_json": ["path", "operator", "target"],
66 "output_contains": ["pattern"],
67 "convergence": ["target"],
68 "diff_stall": [],
69 "llm_structured": [],
70 "mcp_result": [],
71 "harbor_scorer": [],
72}
74# Valid comparison operators
75VALID_OPERATORS = {"eq", "ne", "lt", "le", "gt", "ge"}
77# All top-level keys recognized by FSMLoop.from_dict()
78KNOWN_TOP_LEVEL_KEYS: frozenset[str] = frozenset(
79 {
80 "name",
81 "description",
82 "initial",
83 "states",
84 "context",
85 "parameters",
86 "scope",
87 "max_iterations",
88 "max_edge_revisits",
89 "backoff",
90 "timeout",
91 "default_timeout",
92 "maintain",
93 "llm",
94 "on_handoff",
95 "input_key",
96 "config",
97 "category",
98 "labels",
99 "commands",
100 "targets",
101 "import",
102 "fragments",
103 "from",
104 "flow",
105 "state_defs",
106 }
107)
109# Valid parameter types for the 'parameters:' block
110VALID_PARAMETER_TYPES: frozenset[str] = frozenset(
111 {"string", "integer", "number", "boolean", "enum", "path"}
112)
115def _validate_evaluator(state_name: str, evaluate: EvaluateConfig) -> list[ValidationError]:
116 """Validate evaluator configuration for type-specific requirements.
118 Args:
119 state_name: Name of the state containing this evaluator
120 evaluate: The evaluator configuration to validate
122 Returns:
123 List of validation errors found
124 """
125 errors: list[ValidationError] = []
126 path = f"states.{state_name}.evaluate"
128 # Check that evaluator type is recognized
129 valid_types = set(EVALUATOR_REQUIRED_FIELDS.keys())
130 if evaluate.type not in valid_types:
131 errors.append(
132 ValidationError(
133 message=f"Unknown evaluator type '{evaluate.type}'. "
134 f"Must be one of: {', '.join(sorted(valid_types))}",
135 path=path,
136 )
137 )
138 return errors # Can't check required fields for unknown type
140 # Check required fields for evaluator type
141 required = EVALUATOR_REQUIRED_FIELDS.get(evaluate.type, [])
142 for field_name in required:
143 value = getattr(evaluate, field_name, None)
144 if value is None:
145 errors.append(
146 ValidationError(
147 message=f"Evaluator type '{evaluate.type}' requires '{field_name}' field",
148 path=path,
149 )
150 )
152 # Validate operator if present
153 if evaluate.operator is not None and evaluate.operator not in VALID_OPERATORS:
154 errors.append(
155 ValidationError(
156 message=f"Invalid operator '{evaluate.operator}'. "
157 f"Must be one of: {', '.join(sorted(VALID_OPERATORS))}",
158 path=f"{path}.operator",
159 )
160 )
162 # Validate convergence-specific fields
163 if evaluate.type == "convergence":
164 if evaluate.direction not in ("minimize", "maximize"):
165 errors.append(
166 ValidationError(
167 message=f"Invalid direction '{evaluate.direction}'. "
168 "Must be 'minimize' or 'maximize'",
169 path=f"{path}.direction",
170 )
171 )
172 # Only validate tolerance if it's a numeric value (not an interpolation string)
173 if (
174 evaluate.tolerance is not None
175 and isinstance(evaluate.tolerance, (int, float))
176 and evaluate.tolerance < 0
177 ):
178 errors.append(
179 ValidationError(
180 message="Tolerance cannot be negative",
181 path=f"{path}.tolerance",
182 )
183 )
185 # Validate llm_structured-specific fields
186 if evaluate.type == "llm_structured":
187 if evaluate.min_confidence < 0 or evaluate.min_confidence > 1:
188 errors.append(
189 ValidationError(
190 message="min_confidence must be between 0 and 1",
191 path=f"{path}.min_confidence",
192 )
193 )
195 # Validate diff_stall-specific fields
196 if evaluate.type == "diff_stall":
197 if evaluate.max_stall < 1:
198 errors.append(
199 ValidationError(
200 message="max_stall must be >= 1",
201 path=f"{path}.max_stall",
202 )
203 )
205 return errors
208def _validate_parameters(fsm: FSMLoop) -> list[ValidationError]:
209 """Validate the loop's top-level parameters: block.
211 Args:
212 fsm: The FSM loop to validate
214 Returns:
215 List of validation errors found
216 """
217 errors: list[ValidationError] = []
219 for param_name, param_spec in fsm.parameters.items():
220 path = f"parameters.{param_name}"
222 if param_spec.type not in VALID_PARAMETER_TYPES:
223 errors.append(
224 ValidationError(
225 message=(
226 f"Unknown parameter type '{param_spec.type}'. "
227 f"Must be one of: {', '.join(sorted(VALID_PARAMETER_TYPES))}"
228 ),
229 path=path,
230 )
231 )
233 if param_spec.type == "enum" and not param_spec.values:
234 errors.append(
235 ValidationError(
236 message="Parameter type 'enum' requires a 'values' list",
237 path=path,
238 )
239 )
241 if param_spec.required and param_spec.default is not None:
242 errors.append(
243 ValidationError(
244 message="Parameter cannot be both 'required: true' and have a 'default' value",
245 path=path,
246 )
247 )
249 return errors
252def _check_param_type(value: Any, spec: ParameterSpec) -> str | None:
253 """Return an error message if value does not match spec.type, else None."""
254 if spec.type == "string" and not isinstance(value, str):
255 return f"expected string, got {type(value).__name__}"
256 if spec.type == "integer" and not isinstance(value, int):
257 return f"expected integer, got {type(value).__name__}"
258 if spec.type == "number" and not isinstance(value, (int, float)):
259 return f"expected number, got {type(value).__name__}"
260 if spec.type == "boolean" and not isinstance(value, bool):
261 return f"expected boolean, got {type(value).__name__}"
262 if spec.type == "enum" and spec.values and value not in spec.values:
263 return f"expected one of {spec.values!r}, got {value!r}"
264 return None
267def _validate_with_bindings(fsm: FSMLoop, loop_dir: Path) -> list[ValidationError]:
268 """Validate with: bindings against child loop parameter contracts.
270 Called from load_and_validate (not validate_fsm) because resolving child loops
271 requires file-system access via the loop directory path.
273 Args:
274 fsm: The parent FSM loop
275 loop_dir: Directory to resolve child loop paths from
277 Returns:
278 List of validation errors found
279 """
280 errors: list[ValidationError] = []
282 for state_name, state in fsm.states.items():
283 if state.loop is None or not state.with_:
284 continue
286 # Try to resolve and load the child loop; skip if unavailable
287 try:
288 from little_loops.cli.loop._helpers import resolve_loop_path
290 loop_path = resolve_loop_path(state.loop, loop_dir)
291 child_fsm, _ = load_and_validate(loop_path)
292 except Exception:
293 continue
295 if not child_fsm.parameters:
296 continue # Child has no declared contract — nothing to cross-validate
298 path = f"states.{state_name}"
300 # Unknown with: keys (not declared by child)
301 for key in state.with_:
302 if key not in child_fsm.parameters:
303 errors.append(
304 ValidationError(
305 message=(
306 f"'with.{key}' is not a declared parameter of loop '{state.loop}'. "
307 f"Declared: {', '.join(sorted(child_fsm.parameters))}"
308 ),
309 path=f"{path}.with.{key}",
310 )
311 )
313 # Required parameters not bound
314 for param_name, param_spec in child_fsm.parameters.items():
315 if param_spec.required and param_name not in state.with_:
316 errors.append(
317 ValidationError(
318 message=(
319 f"Required parameter '{param_name}' of loop '{state.loop}' "
320 f"is not bound in 'with'"
321 ),
322 path=f"{path}.with",
323 )
324 )
326 # Statically-detectable type mismatches (skip interpolation strings)
327 for param_name, value in state.with_.items():
328 if param_name not in child_fsm.parameters:
329 continue
330 if isinstance(value, str) and "${" in value:
331 continue
332 type_error = _check_param_type(value, child_fsm.parameters[param_name])
333 if type_error:
334 errors.append(
335 ValidationError(
336 message=f"Parameter '{param_name}': {type_error}",
337 path=f"{path}.with.{param_name}",
338 )
339 )
341 return errors
344def _validate_state_action(state_name: str, state: StateConfig) -> list[ValidationError]:
345 """Validate state action configuration.
347 Args:
348 state_name: Name of the state to validate
349 state: The state configuration to validate
351 Returns:
352 List of validation errors found
353 """
354 errors: list[ValidationError] = []
355 path = f"states.{state_name}"
357 # params field is only valid for mcp_tool states
358 if state.params and state.action_type != "mcp_tool":
359 errors.append(
360 ValidationError(
361 message="'params' field is only valid when action_type is 'mcp_tool'",
362 path=f"{path}.params",
363 )
364 )
366 # loop and action are mutually exclusive
367 if state.loop is not None and state.action is not None:
368 errors.append(
369 ValidationError(
370 message="'loop' and 'action' are mutually exclusive — "
371 "a sub-loop state cannot also have an action",
372 path=f"{path}",
373 )
374 )
376 # with: requires loop: to be set
377 if state.with_ and state.loop is None:
378 errors.append(
379 ValidationError(
380 message="'with' is only valid when 'loop' is set",
381 path=f"{path}.with",
382 )
383 )
385 # FEAT-1283: type=learning requires a populated LearningConfig
386 if state.type == "learning" and state.learning is not None:
387 if not state.learning.targets:
388 errors.append(
389 ValidationError(
390 message="type=learning requires non-empty 'learning.targets'",
391 path=f"{path}.learning.targets",
392 )
393 )
394 if state.learning.max_retries < 0:
395 errors.append(
396 ValidationError(
397 message=(
398 f"learning.max_retries must be >= 0, got {state.learning.max_retries}"
399 ),
400 path=f"{path}.learning.max_retries",
401 )
402 )
403 if state.on_yes is None:
404 errors.append(
405 ValidationError(
406 message="type=learning requires 'on_yes' (target for all-proven)",
407 path=f"{path}.on_yes",
408 )
409 )
410 if state.on_blocked is None and state.on_no is None:
411 errors.append(
412 ValidationError(
413 message=(
414 "type=learning requires 'on_blocked' or 'on_no' "
415 "(target for refuted / retries_exhausted)"
416 ),
417 path=f"{path}",
418 )
419 )
421 # with: and context_passthrough are mutually exclusive
422 if state.with_ and state.context_passthrough:
423 errors.append(
424 ValidationError(
425 message=(
426 "'with' and 'context_passthrough' are mutually exclusive — "
427 "use 'with' for explicit parameter bindings or 'context_passthrough' "
428 "for legacy bulk passthrough, not both"
429 ),
430 path=f"{path}",
431 )
432 )
434 return errors
437def _validate_state_routing(state_name: str, state: StateConfig) -> list[ValidationError]:
438 """Validate state routing configuration.
440 Checks for conflicting routing definitions (shorthand vs full route).
442 Args:
443 state_name: Name of the state to validate
444 state: The state configuration to validate
446 Returns:
447 List of validation errors/warnings found
448 """
449 errors: list[ValidationError] = []
450 path = f"states.{state_name}"
452 has_shorthand = (
453 state.on_yes is not None
454 or state.on_no is not None
455 or state.on_error is not None
456 or state.on_partial is not None
457 or state.on_blocked is not None
458 or bool(state.extra_routes)
459 )
460 has_route = state.route is not None
462 # Warn about conflicting definitions
463 if has_shorthand and has_route:
464 errors.append(
465 ValidationError(
466 message="Both shorthand routing (on_yes/on_no/on_error) "
467 "and full route table defined. Route table will take precedence.",
468 path=path,
469 severity=ValidationSeverity.WARNING,
470 )
471 )
473 # Check for no valid transition definition
474 has_next = state.next is not None
475 has_terminal = state.terminal
476 has_loop = state.loop is not None
478 if not has_shorthand and not has_route and not has_next and not has_terminal and not has_loop:
479 errors.append(
480 ValidationError(
481 message="State has no transition defined. Add routing, 'next', "
482 "or mark as 'terminal: true'",
483 path=path,
484 )
485 )
487 # Validate retry field pairing: max_retries requires on_retry_exhausted and vice versa
488 if state.max_retries is not None and state.on_retry_exhausted is None:
489 errors.append(
490 ValidationError(
491 message="'max_retries' requires 'on_retry_exhausted' to also be set",
492 path=path,
493 )
494 )
495 if state.on_retry_exhausted is not None and state.max_retries is None:
496 errors.append(
497 ValidationError(
498 message="'on_retry_exhausted' requires 'max_retries' to also be set",
499 path=path,
500 )
501 )
502 if state.max_retries is not None and state.max_retries < 1:
503 errors.append(
504 ValidationError(
505 message=f"'max_retries' must be >= 1, got {state.max_retries}",
506 path=path,
507 )
508 )
510 # Validate rate-limit retry field pairing (mirrors max_retries/on_retry_exhausted)
511 if state.max_rate_limit_retries is not None and state.on_rate_limit_exhausted is None:
512 errors.append(
513 ValidationError(
514 message="'max_rate_limit_retries' requires 'on_rate_limit_exhausted' to also be set",
515 path=path,
516 )
517 )
518 if state.on_rate_limit_exhausted is not None and state.max_rate_limit_retries is None:
519 errors.append(
520 ValidationError(
521 message="'on_rate_limit_exhausted' requires 'max_rate_limit_retries' to also be set",
522 path=path,
523 )
524 )
525 if state.max_rate_limit_retries is not None and state.max_rate_limit_retries < 1:
526 errors.append(
527 ValidationError(
528 message=f"'max_rate_limit_retries' must be >= 1, got {state.max_rate_limit_retries}",
529 path=path,
530 )
531 )
532 if (
533 state.rate_limit_backoff_base_seconds is not None
534 and state.rate_limit_backoff_base_seconds < 1
535 ):
536 errors.append(
537 ValidationError(
538 message=(
539 f"'rate_limit_backoff_base_seconds' must be >= 1, "
540 f"got {state.rate_limit_backoff_base_seconds}"
541 ),
542 path=path,
543 )
544 )
545 if state.rate_limit_max_wait_seconds is not None and state.rate_limit_max_wait_seconds < 1:
546 errors.append(
547 ValidationError(
548 message=(
549 f"'rate_limit_max_wait_seconds' must be >= 1, "
550 f"got {state.rate_limit_max_wait_seconds}"
551 ),
552 path=path,
553 )
554 )
555 if state.rate_limit_long_wait_ladder is not None:
556 if len(state.rate_limit_long_wait_ladder) == 0:
557 errors.append(
558 ValidationError(
559 message="'rate_limit_long_wait_ladder' must be non-empty if specified",
560 path=path,
561 )
562 )
563 else:
564 for idx, value in enumerate(state.rate_limit_long_wait_ladder):
565 if not isinstance(value, int) or value < 1:
566 errors.append(
567 ValidationError(
568 message=(
569 f"'rate_limit_long_wait_ladder[{idx}]' must be a "
570 f"positive integer, got {value!r}"
571 ),
572 path=path,
573 )
574 )
576 # Validate throttle config when present
577 if state.throttle is not None:
578 t = state.throttle
579 fields = {
580 "normal_max": t.normal_max,
581 "warn_max": t.warn_max,
582 "hard_max": t.hard_max,
583 }
584 for field_name, val in fields.items():
585 if val is not None and (not isinstance(val, int) or val < 1):
586 errors.append(
587 ValidationError(
588 message=f"'throttle.{field_name}' must be a positive integer, got {val!r}",
589 path=path,
590 )
591 )
592 # Enforce ordering when all three are set
593 if t.normal_max is not None and t.warn_max is not None and t.normal_max >= t.warn_max:
594 errors.append(
595 ValidationError(
596 message=(
597 f"'throttle.normal_max' ({t.normal_max}) must be less than "
598 f"'throttle.warn_max' ({t.warn_max})"
599 ),
600 path=path,
601 )
602 )
603 if t.warn_max is not None and t.hard_max is not None and t.warn_max >= t.hard_max:
604 errors.append(
605 ValidationError(
606 message=(
607 f"'throttle.warn_max' ({t.warn_max}) must be less than "
608 f"'throttle.hard_max' ({t.hard_max})"
609 ),
610 path=path,
611 )
612 )
614 return errors
617def _validate_targets(fsm: FSMLoop) -> list[ValidationError]:
618 """Validate top-level targets[] entries (ENH-1552).
620 Rejects any targets[].states[] entry whose sibling file: value does not
621 end with a .yaml extension.
622 """
623 errors: list[ValidationError] = []
624 for i, target in enumerate(fsm.targets):
625 if target.file is not None and not target.file.endswith(".yaml"):
626 errors.append(
627 ValidationError(
628 message=(f"targets[{i}].file must be a .yaml file, got '{target.file}'"),
629 path=f"targets[{i}].file",
630 )
631 )
632 return errors
635def _validate_failure_terminal_action(fsm: FSMLoop) -> list[ValidationError]:
636 """Warn when a failure-named terminal state has no diagnostic predecessor.
638 Failure terminals (failed, error, aborted) should have at least one
639 predecessor state with an action or sub-loop that provides diagnostic
640 output before termination. Otherwise the failure is silent — the
641 executor calls _finish("terminal") before any action on the terminal
642 itself can execute.
644 Severity is WARNING (not ERROR) so that existing loops with bare
645 failure terminals continue to load, and test_terminal_only_state_valid
646 (which filters by ERROR) passes without modification.
647 """
648 FAILURE_TERMINAL_NAMES: frozenset[str] = frozenset({"failed", "error", "aborted"})
649 errors: list[ValidationError] = []
651 terminal_states = fsm.get_terminal_states()
652 failure_terminals = terminal_states & FAILURE_TERMINAL_NAMES
654 for ft_name in failure_terminals:
655 has_diagnostic_predecessor = False
656 for state_name, state in fsm.states.items():
657 if state_name == ft_name:
658 continue
659 if ft_name in state.get_referenced_states():
660 if state.action is not None or state.loop is not None:
661 has_diagnostic_predecessor = True
662 break
664 if not has_diagnostic_predecessor:
665 errors.append(
666 ValidationError(
667 message=(
668 f"Failure-named terminal state '{ft_name}' has no predecessor "
669 "state with a diagnostic action. Add a non-terminal diagnostic "
670 "state (e.g. 'diagnose') with an action or sub-loop that routes "
671 f"to '{ft_name}'."
672 ),
673 path=f"states.{ft_name}",
674 severity=ValidationSeverity.WARNING,
675 )
676 )
678 return errors
681def validate_fsm(fsm: FSMLoop) -> list[ValidationError]:
682 """Validate FSM structure and return list of errors.
684 Performs comprehensive validation:
685 - Initial state exists
686 - All referenced states exist
687 - At least one terminal state
688 - Evaluator configurations are valid
689 - Routing configurations are valid
690 - Numeric fields are in valid ranges (max_iterations > 0, backoff >= 0, timeout > 0)
692 Args:
693 fsm: The FSM loop to validate
695 Returns:
696 List of validation errors (empty if valid)
697 """
698 errors: list[ValidationError] = []
699 defined_states = fsm.get_all_state_names()
701 # Warn when no top-level description: field is set. The field is optional
702 # for FSM execution but required for goal-alignment skills (debug-loop-run,
703 # audit-loop-run) and for ll-loop show --json to surface intent text.
704 if not fsm.description:
705 errors.append(
706 ValidationError(
707 path="<root>",
708 message=("No 'description' field defined. Add a top-level description: key."),
709 severity=ValidationSeverity.WARNING,
710 )
711 )
713 # Validate parameters block
714 errors.extend(_validate_parameters(fsm))
716 # Validate targets block (ENH-1552)
717 errors.extend(_validate_targets(fsm))
719 # Check initial state exists
720 if fsm.initial not in defined_states:
721 errors.append(
722 ValidationError(
723 message=f"Initial state '{fsm.initial}' not found in states",
724 path="initial",
725 )
726 )
728 # Check at least one terminal state
729 terminal_states = fsm.get_terminal_states()
730 if not terminal_states:
731 errors.append(
732 ValidationError(
733 message="No terminal state defined. At least one state must have 'terminal: true'",
734 path="states",
735 )
736 )
738 # Validate each state
739 for state_name, state in fsm.states.items():
740 # Check all referenced states exist
741 refs = state.get_referenced_states()
742 for ref in refs:
743 # $current is a special token for retry
744 if ref != "$current" and ref not in defined_states:
745 errors.append(
746 ValidationError(
747 message=f"References unknown state '{ref}'",
748 path=f"states.{state_name}",
749 )
750 )
752 # Validate action configuration
753 errors.extend(_validate_state_action(state_name, state))
755 # Validate evaluator if present
756 if state.evaluate is not None:
757 errors.extend(_validate_evaluator(state_name, state.evaluate))
759 # Validate routing configuration
760 errors.extend(_validate_state_routing(state_name, state))
762 # Check numeric field ranges
763 if fsm.max_iterations <= 0:
764 errors.append(
765 ValidationError(
766 message=f"max_iterations must be > 0, got {fsm.max_iterations}",
767 path="max_iterations",
768 )
769 )
770 if fsm.max_edge_revisits <= 0:
771 errors.append(
772 ValidationError(
773 message=f"max_edge_revisits must be > 0, got {fsm.max_edge_revisits}",
774 path="max_edge_revisits",
775 )
776 )
777 if fsm.backoff is not None and fsm.backoff < 0:
778 errors.append(
779 ValidationError(
780 message=f"backoff must be >= 0, got {fsm.backoff}",
781 path="backoff",
782 )
783 )
784 if fsm.timeout is not None and fsm.timeout <= 0:
785 errors.append(
786 ValidationError(
787 message=f"timeout must be > 0, got {fsm.timeout}",
788 path="timeout",
789 )
790 )
791 if fsm.llm.max_tokens <= 0:
792 errors.append(
793 ValidationError(
794 message=f"llm.max_tokens must be > 0, got {fsm.llm.max_tokens}",
795 path="llm.max_tokens",
796 )
797 )
798 if fsm.llm.timeout <= 0:
799 errors.append(
800 ValidationError(
801 message=f"llm.timeout must be > 0, got {fsm.llm.timeout}",
802 path="llm.timeout",
803 )
804 )
806 # Check for unreachable states (warning only)
807 reachable = _find_reachable_states(fsm)
808 unreachable = defined_states - reachable
809 for state_name in unreachable:
810 errors.append(
811 ValidationError(
812 message="State is not reachable from initial state",
813 path=f"states.{state_name}",
814 severity=ValidationSeverity.WARNING,
815 )
816 )
818 errors.extend(_validate_failure_terminal_action(fsm))
820 return errors
823def _find_reachable_states(fsm: FSMLoop) -> set[str]:
824 """Find all states reachable from the initial state.
826 Uses breadth-first search to find all reachable states.
828 Args:
829 fsm: The FSM loop to analyze
831 Returns:
832 Set of reachable state names
833 """
834 reachable: set[str] = set()
835 to_visit: deque[str] = deque([fsm.initial])
837 while to_visit:
838 current = to_visit.popleft()
839 if current in reachable or current not in fsm.states:
840 continue
842 reachable.add(current)
843 state = fsm.states[current]
844 refs = state.get_referenced_states()
846 for ref in refs:
847 if ref != "$current" and ref not in reachable:
848 to_visit.append(ref)
850 return reachable
853def load_and_validate(path: Path) -> tuple[FSMLoop, list[ValidationError]]:
854 """Load YAML file and validate FSM structure.
856 Args:
857 path: Path to the YAML file to load
859 Returns:
860 Tuple of (validated FSMLoop instance, list of WARNING-severity ValidationErrors)
862 Raises:
863 FileNotFoundError: If the file doesn't exist
864 yaml.YAMLError: If the file is not valid YAML
865 ValueError: If validation fails (contains error details)
866 """
867 if not path.exists():
868 raise FileNotFoundError(f"FSM file not found: {path}")
870 with open(path) as f:
871 data: dict[str, Any] = yaml.safe_load(f)
873 if not isinstance(data, dict):
874 raise ValueError(f"FSM file must contain a YAML mapping, got {type(data)}")
876 # Resolve `from:` inheritance before any further checks, so a child loop
877 # can omit fields its parent provides (including `initial`/`states`) and
878 # so a parent's `import:`/`fragments:` blocks survive into the merged
879 # result for the subsequent `resolve_fragments` pass.
880 data = resolve_inheritance(data, path.parent)
882 # Expand flow: linear shorthand into states: before required-fields check
883 data = resolve_flow(data)
885 # Check required fields before parsing
886 missing = []
887 for field in ["name", "initial"]:
888 if field not in data:
889 missing.append(field)
890 if "states" not in data:
891 missing.append("states (or flow)")
893 if missing:
894 raise ValueError(f"FSM file missing required fields: {', '.join(missing)}")
896 # Check for unknown top-level keys before parsing
897 unknown_key_warnings: list[ValidationError] = []
898 unknown = set(data.keys()) - KNOWN_TOP_LEVEL_KEYS
899 if unknown:
900 unknown_key_warnings.append(
901 ValidationError(
902 path="<root>",
903 message=f"Unknown top-level keys: {', '.join(sorted(unknown))}",
904 severity=ValidationSeverity.WARNING,
905 )
906 )
908 # Resolve fragment libraries before parsing into dataclass
909 data = resolve_fragments(data, path.parent)
911 # Parse into dataclass
912 fsm = FSMLoop.from_dict(data)
914 # Validate
915 errors = validate_fsm(fsm)
917 # Validate with: bindings against child loop parameters (requires file-system access)
918 errors.extend(_validate_with_bindings(fsm, path.parent))
920 # Filter to errors only (not warnings) for raising
921 error_list = [e for e in errors if e.severity == ValidationSeverity.ERROR]
923 if error_list:
924 error_messages = "\n ".join(str(e) for e in error_list)
925 raise ValueError(f"FSM validation failed:\n {error_messages}")
927 # Collect all warnings (unknown-key warnings + structural warnings)
928 struct_warnings = [e for e in errors if e.severity == ValidationSeverity.WARNING]
929 all_warnings = unknown_key_warnings + struct_warnings
930 for warning in all_warnings:
931 logger.warning(str(warning))
933 return fsm, all_warnings