Coverage for little_loops / fsm / validation.py: 89%

282 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-05-22 16:19 -0500

1"""FSM loop validation logic. 

2 

3This module provides validation for FSM loop definitions, ensuring 

4structural correctness and catching common configuration errors. 

5 

6Validation checks: 

7- Initial state exists in states dict 

8- All referenced states exist 

9- At least one terminal state 

10- Evaluator configs have required fields for their type 

11- No conflicting routing (shorthand vs full route) 

12- Numeric fields in valid ranges (max_iterations > 0, backoff >= 0, timeout > 0) 

13""" 

14 

15from __future__ import annotations 

16 

17import logging 

18from collections import deque 

19from dataclasses import dataclass 

20from enum import Enum 

21from pathlib import Path 

22from typing import Any 

23 

24import yaml 

25 

26from little_loops.fsm.fragments import resolve_flow, resolve_fragments, resolve_inheritance 

27from little_loops.fsm.schema import EvaluateConfig, FSMLoop, ParameterSpec, StateConfig 

28 

29logger = logging.getLogger(__name__) 

30 

31 

32class ValidationSeverity(Enum): 

33 """Severity level for validation issues.""" 

34 

35 ERROR = "error" 

36 WARNING = "warning" 

37 

38 

39@dataclass 

40class ValidationError: 

41 """Structured validation error. 

42 

43 Attributes: 

44 message: Human-readable error description 

45 path: Path to the problematic element (e.g., "states.check.route") 

46 severity: Error severity (error or warning) 

47 """ 

48 

49 message: str 

50 path: str | None = None 

51 severity: ValidationSeverity = ValidationSeverity.ERROR 

52 

53 def __str__(self) -> str: 

54 """Format error for display.""" 

55 prefix = f"[{self.severity.value.upper()}]" 

56 if self.path: 

57 return f"{prefix} {self.path}: {self.message}" 

58 return f"{prefix} {self.message}" 

59 

60 

61# Evaluator type to required fields mapping 

62EVALUATOR_REQUIRED_FIELDS: dict[str, list[str]] = { 

63 "exit_code": [], 

64 "output_numeric": ["operator", "target"], 

65 "output_json": ["path", "operator", "target"], 

66 "output_contains": ["pattern"], 

67 "convergence": ["target"], 

68 "diff_stall": [], 

69 "llm_structured": [], 

70 "mcp_result": [], 

71 "harbor_scorer": [], 

72} 

73 

74# Valid comparison operators 

75VALID_OPERATORS = {"eq", "ne", "lt", "le", "gt", "ge"} 

76 

77# All top-level keys recognized by FSMLoop.from_dict() 

78KNOWN_TOP_LEVEL_KEYS: frozenset[str] = frozenset( 

79 { 

80 "name", 

81 "description", 

82 "initial", 

83 "states", 

84 "context", 

85 "parameters", 

86 "scope", 

87 "max_iterations", 

88 "max_edge_revisits", 

89 "backoff", 

90 "timeout", 

91 "default_timeout", 

92 "maintain", 

93 "llm", 

94 "on_handoff", 

95 "input_key", 

96 "config", 

97 "category", 

98 "labels", 

99 "commands", 

100 "targets", 

101 "import", 

102 "fragments", 

103 "from", 

104 "flow", 

105 "state_defs", 

106 } 

107) 

108 

109# Valid parameter types for the 'parameters:' block 

110VALID_PARAMETER_TYPES: frozenset[str] = frozenset( 

111 {"string", "integer", "number", "boolean", "enum", "path"} 

112) 

113 

114 

115def _validate_evaluator(state_name: str, evaluate: EvaluateConfig) -> list[ValidationError]: 

116 """Validate evaluator configuration for type-specific requirements. 

117 

118 Args: 

119 state_name: Name of the state containing this evaluator 

120 evaluate: The evaluator configuration to validate 

121 

122 Returns: 

123 List of validation errors found 

124 """ 

125 errors: list[ValidationError] = [] 

126 path = f"states.{state_name}.evaluate" 

127 

128 # Check that evaluator type is recognized 

129 valid_types = set(EVALUATOR_REQUIRED_FIELDS.keys()) 

130 if evaluate.type not in valid_types: 

131 errors.append( 

132 ValidationError( 

133 message=f"Unknown evaluator type '{evaluate.type}'. " 

134 f"Must be one of: {', '.join(sorted(valid_types))}", 

135 path=path, 

136 ) 

137 ) 

138 return errors # Can't check required fields for unknown type 

139 

140 # Check required fields for evaluator type 

141 required = EVALUATOR_REQUIRED_FIELDS.get(evaluate.type, []) 

142 for field_name in required: 

143 value = getattr(evaluate, field_name, None) 

144 if value is None: 

145 errors.append( 

146 ValidationError( 

147 message=f"Evaluator type '{evaluate.type}' requires '{field_name}' field", 

148 path=path, 

149 ) 

150 ) 

151 

152 # Validate operator if present 

153 if evaluate.operator is not None and evaluate.operator not in VALID_OPERATORS: 

154 errors.append( 

155 ValidationError( 

156 message=f"Invalid operator '{evaluate.operator}'. " 

157 f"Must be one of: {', '.join(sorted(VALID_OPERATORS))}", 

158 path=f"{path}.operator", 

159 ) 

160 ) 

161 

162 # Validate convergence-specific fields 

163 if evaluate.type == "convergence": 

164 if evaluate.direction not in ("minimize", "maximize"): 

165 errors.append( 

166 ValidationError( 

167 message=f"Invalid direction '{evaluate.direction}'. " 

168 "Must be 'minimize' or 'maximize'", 

169 path=f"{path}.direction", 

170 ) 

171 ) 

172 # Only validate tolerance if it's a numeric value (not an interpolation string) 

173 if ( 

174 evaluate.tolerance is not None 

175 and isinstance(evaluate.tolerance, (int, float)) 

176 and evaluate.tolerance < 0 

177 ): 

178 errors.append( 

179 ValidationError( 

180 message="Tolerance cannot be negative", 

181 path=f"{path}.tolerance", 

182 ) 

183 ) 

184 

185 # Validate llm_structured-specific fields 

186 if evaluate.type == "llm_structured": 

187 if evaluate.min_confidence < 0 or evaluate.min_confidence > 1: 

188 errors.append( 

189 ValidationError( 

190 message="min_confidence must be between 0 and 1", 

191 path=f"{path}.min_confidence", 

192 ) 

193 ) 

194 

195 # Validate diff_stall-specific fields 

196 if evaluate.type == "diff_stall": 

197 if evaluate.max_stall < 1: 

198 errors.append( 

199 ValidationError( 

200 message="max_stall must be >= 1", 

201 path=f"{path}.max_stall", 

202 ) 

203 ) 

204 

205 return errors 

206 

207 

208def _validate_parameters(fsm: FSMLoop) -> list[ValidationError]: 

209 """Validate the loop's top-level parameters: block. 

210 

211 Args: 

212 fsm: The FSM loop to validate 

213 

214 Returns: 

215 List of validation errors found 

216 """ 

217 errors: list[ValidationError] = [] 

218 

219 for param_name, param_spec in fsm.parameters.items(): 

220 path = f"parameters.{param_name}" 

221 

222 if param_spec.type not in VALID_PARAMETER_TYPES: 

223 errors.append( 

224 ValidationError( 

225 message=( 

226 f"Unknown parameter type '{param_spec.type}'. " 

227 f"Must be one of: {', '.join(sorted(VALID_PARAMETER_TYPES))}" 

228 ), 

229 path=path, 

230 ) 

231 ) 

232 

233 if param_spec.type == "enum" and not param_spec.values: 

234 errors.append( 

235 ValidationError( 

236 message="Parameter type 'enum' requires a 'values' list", 

237 path=path, 

238 ) 

239 ) 

240 

241 if param_spec.required and param_spec.default is not None: 

242 errors.append( 

243 ValidationError( 

244 message="Parameter cannot be both 'required: true' and have a 'default' value", 

245 path=path, 

246 ) 

247 ) 

248 

249 return errors 

250 

251 

252def _check_param_type(value: Any, spec: ParameterSpec) -> str | None: 

253 """Return an error message if value does not match spec.type, else None.""" 

254 if spec.type == "string" and not isinstance(value, str): 

255 return f"expected string, got {type(value).__name__}" 

256 if spec.type == "integer" and not isinstance(value, int): 

257 return f"expected integer, got {type(value).__name__}" 

258 if spec.type == "number" and not isinstance(value, (int, float)): 

259 return f"expected number, got {type(value).__name__}" 

260 if spec.type == "boolean" and not isinstance(value, bool): 

261 return f"expected boolean, got {type(value).__name__}" 

262 if spec.type == "enum" and spec.values and value not in spec.values: 

263 return f"expected one of {spec.values!r}, got {value!r}" 

264 return None 

265 

266 

267def _validate_with_bindings(fsm: FSMLoop, loop_dir: Path) -> list[ValidationError]: 

268 """Validate with: bindings against child loop parameter contracts. 

269 

270 Called from load_and_validate (not validate_fsm) because resolving child loops 

271 requires file-system access via the loop directory path. 

272 

273 Args: 

274 fsm: The parent FSM loop 

275 loop_dir: Directory to resolve child loop paths from 

276 

277 Returns: 

278 List of validation errors found 

279 """ 

280 errors: list[ValidationError] = [] 

281 

282 for state_name, state in fsm.states.items(): 

283 if state.loop is None or not state.with_: 

284 continue 

285 

286 # Try to resolve and load the child loop; skip if unavailable 

287 try: 

288 from little_loops.cli.loop._helpers import resolve_loop_path 

289 

290 loop_path = resolve_loop_path(state.loop, loop_dir) 

291 child_fsm, _ = load_and_validate(loop_path) 

292 except Exception: 

293 continue 

294 

295 if not child_fsm.parameters: 

296 continue # Child has no declared contract — nothing to cross-validate 

297 

298 path = f"states.{state_name}" 

299 

300 # Unknown with: keys (not declared by child) 

301 for key in state.with_: 

302 if key not in child_fsm.parameters: 

303 errors.append( 

304 ValidationError( 

305 message=( 

306 f"'with.{key}' is not a declared parameter of loop '{state.loop}'. " 

307 f"Declared: {', '.join(sorted(child_fsm.parameters))}" 

308 ), 

309 path=f"{path}.with.{key}", 

310 ) 

311 ) 

312 

313 # Required parameters not bound 

314 for param_name, param_spec in child_fsm.parameters.items(): 

315 if param_spec.required and param_name not in state.with_: 

316 errors.append( 

317 ValidationError( 

318 message=( 

319 f"Required parameter '{param_name}' of loop '{state.loop}' " 

320 f"is not bound in 'with'" 

321 ), 

322 path=f"{path}.with", 

323 ) 

324 ) 

325 

326 # Statically-detectable type mismatches (skip interpolation strings) 

327 for param_name, value in state.with_.items(): 

328 if param_name not in child_fsm.parameters: 

329 continue 

330 if isinstance(value, str) and "${" in value: 

331 continue 

332 type_error = _check_param_type(value, child_fsm.parameters[param_name]) 

333 if type_error: 

334 errors.append( 

335 ValidationError( 

336 message=f"Parameter '{param_name}': {type_error}", 

337 path=f"{path}.with.{param_name}", 

338 ) 

339 ) 

340 

341 return errors 

342 

343 

344def _validate_state_action(state_name: str, state: StateConfig) -> list[ValidationError]: 

345 """Validate state action configuration. 

346 

347 Args: 

348 state_name: Name of the state to validate 

349 state: The state configuration to validate 

350 

351 Returns: 

352 List of validation errors found 

353 """ 

354 errors: list[ValidationError] = [] 

355 path = f"states.{state_name}" 

356 

357 # params field is only valid for mcp_tool states 

358 if state.params and state.action_type != "mcp_tool": 

359 errors.append( 

360 ValidationError( 

361 message="'params' field is only valid when action_type is 'mcp_tool'", 

362 path=f"{path}.params", 

363 ) 

364 ) 

365 

366 # loop and action are mutually exclusive 

367 if state.loop is not None and state.action is not None: 

368 errors.append( 

369 ValidationError( 

370 message="'loop' and 'action' are mutually exclusive — " 

371 "a sub-loop state cannot also have an action", 

372 path=f"{path}", 

373 ) 

374 ) 

375 

376 # with: requires loop: to be set 

377 if state.with_ and state.loop is None: 

378 errors.append( 

379 ValidationError( 

380 message="'with' is only valid when 'loop' is set", 

381 path=f"{path}.with", 

382 ) 

383 ) 

384 

385 # FEAT-1283: type=learning requires a populated LearningConfig 

386 if state.type == "learning" and state.learning is not None: 

387 if not state.learning.targets: 

388 errors.append( 

389 ValidationError( 

390 message="type=learning requires non-empty 'learning.targets'", 

391 path=f"{path}.learning.targets", 

392 ) 

393 ) 

394 if state.learning.max_retries < 0: 

395 errors.append( 

396 ValidationError( 

397 message=( 

398 f"learning.max_retries must be >= 0, got {state.learning.max_retries}" 

399 ), 

400 path=f"{path}.learning.max_retries", 

401 ) 

402 ) 

403 if state.on_yes is None: 

404 errors.append( 

405 ValidationError( 

406 message="type=learning requires 'on_yes' (target for all-proven)", 

407 path=f"{path}.on_yes", 

408 ) 

409 ) 

410 if state.on_blocked is None and state.on_no is None: 

411 errors.append( 

412 ValidationError( 

413 message=( 

414 "type=learning requires 'on_blocked' or 'on_no' " 

415 "(target for refuted / retries_exhausted)" 

416 ), 

417 path=f"{path}", 

418 ) 

419 ) 

420 

421 # with: and context_passthrough are mutually exclusive 

422 if state.with_ and state.context_passthrough: 

423 errors.append( 

424 ValidationError( 

425 message=( 

426 "'with' and 'context_passthrough' are mutually exclusive — " 

427 "use 'with' for explicit parameter bindings or 'context_passthrough' " 

428 "for legacy bulk passthrough, not both" 

429 ), 

430 path=f"{path}", 

431 ) 

432 ) 

433 

434 return errors 

435 

436 

437def _validate_state_routing(state_name: str, state: StateConfig) -> list[ValidationError]: 

438 """Validate state routing configuration. 

439 

440 Checks for conflicting routing definitions (shorthand vs full route). 

441 

442 Args: 

443 state_name: Name of the state to validate 

444 state: The state configuration to validate 

445 

446 Returns: 

447 List of validation errors/warnings found 

448 """ 

449 errors: list[ValidationError] = [] 

450 path = f"states.{state_name}" 

451 

452 has_shorthand = ( 

453 state.on_yes is not None 

454 or state.on_no is not None 

455 or state.on_error is not None 

456 or state.on_partial is not None 

457 or state.on_blocked is not None 

458 or bool(state.extra_routes) 

459 ) 

460 has_route = state.route is not None 

461 

462 # Warn about conflicting definitions 

463 if has_shorthand and has_route: 

464 errors.append( 

465 ValidationError( 

466 message="Both shorthand routing (on_yes/on_no/on_error) " 

467 "and full route table defined. Route table will take precedence.", 

468 path=path, 

469 severity=ValidationSeverity.WARNING, 

470 ) 

471 ) 

472 

473 # Check for no valid transition definition 

474 has_next = state.next is not None 

475 has_terminal = state.terminal 

476 has_loop = state.loop is not None 

477 

478 if not has_shorthand and not has_route and not has_next and not has_terminal and not has_loop: 

479 errors.append( 

480 ValidationError( 

481 message="State has no transition defined. Add routing, 'next', " 

482 "or mark as 'terminal: true'", 

483 path=path, 

484 ) 

485 ) 

486 

487 # Validate retry field pairing: max_retries requires on_retry_exhausted and vice versa 

488 if state.max_retries is not None and state.on_retry_exhausted is None: 

489 errors.append( 

490 ValidationError( 

491 message="'max_retries' requires 'on_retry_exhausted' to also be set", 

492 path=path, 

493 ) 

494 ) 

495 if state.on_retry_exhausted is not None and state.max_retries is None: 

496 errors.append( 

497 ValidationError( 

498 message="'on_retry_exhausted' requires 'max_retries' to also be set", 

499 path=path, 

500 ) 

501 ) 

502 if state.max_retries is not None and state.max_retries < 1: 

503 errors.append( 

504 ValidationError( 

505 message=f"'max_retries' must be >= 1, got {state.max_retries}", 

506 path=path, 

507 ) 

508 ) 

509 

510 # Validate rate-limit retry field pairing (mirrors max_retries/on_retry_exhausted) 

511 if state.max_rate_limit_retries is not None and state.on_rate_limit_exhausted is None: 

512 errors.append( 

513 ValidationError( 

514 message="'max_rate_limit_retries' requires 'on_rate_limit_exhausted' to also be set", 

515 path=path, 

516 ) 

517 ) 

518 if state.on_rate_limit_exhausted is not None and state.max_rate_limit_retries is None: 

519 errors.append( 

520 ValidationError( 

521 message="'on_rate_limit_exhausted' requires 'max_rate_limit_retries' to also be set", 

522 path=path, 

523 ) 

524 ) 

525 if state.max_rate_limit_retries is not None and state.max_rate_limit_retries < 1: 

526 errors.append( 

527 ValidationError( 

528 message=f"'max_rate_limit_retries' must be >= 1, got {state.max_rate_limit_retries}", 

529 path=path, 

530 ) 

531 ) 

532 if ( 

533 state.rate_limit_backoff_base_seconds is not None 

534 and state.rate_limit_backoff_base_seconds < 1 

535 ): 

536 errors.append( 

537 ValidationError( 

538 message=( 

539 f"'rate_limit_backoff_base_seconds' must be >= 1, " 

540 f"got {state.rate_limit_backoff_base_seconds}" 

541 ), 

542 path=path, 

543 ) 

544 ) 

545 if state.rate_limit_max_wait_seconds is not None and state.rate_limit_max_wait_seconds < 1: 

546 errors.append( 

547 ValidationError( 

548 message=( 

549 f"'rate_limit_max_wait_seconds' must be >= 1, " 

550 f"got {state.rate_limit_max_wait_seconds}" 

551 ), 

552 path=path, 

553 ) 

554 ) 

555 if state.rate_limit_long_wait_ladder is not None: 

556 if len(state.rate_limit_long_wait_ladder) == 0: 

557 errors.append( 

558 ValidationError( 

559 message="'rate_limit_long_wait_ladder' must be non-empty if specified", 

560 path=path, 

561 ) 

562 ) 

563 else: 

564 for idx, value in enumerate(state.rate_limit_long_wait_ladder): 

565 if not isinstance(value, int) or value < 1: 

566 errors.append( 

567 ValidationError( 

568 message=( 

569 f"'rate_limit_long_wait_ladder[{idx}]' must be a " 

570 f"positive integer, got {value!r}" 

571 ), 

572 path=path, 

573 ) 

574 ) 

575 

576 # Validate throttle config when present 

577 if state.throttle is not None: 

578 t = state.throttle 

579 fields = { 

580 "normal_max": t.normal_max, 

581 "warn_max": t.warn_max, 

582 "hard_max": t.hard_max, 

583 } 

584 for field_name, val in fields.items(): 

585 if val is not None and (not isinstance(val, int) or val < 1): 

586 errors.append( 

587 ValidationError( 

588 message=f"'throttle.{field_name}' must be a positive integer, got {val!r}", 

589 path=path, 

590 ) 

591 ) 

592 # Enforce ordering when all three are set 

593 if t.normal_max is not None and t.warn_max is not None and t.normal_max >= t.warn_max: 

594 errors.append( 

595 ValidationError( 

596 message=( 

597 f"'throttle.normal_max' ({t.normal_max}) must be less than " 

598 f"'throttle.warn_max' ({t.warn_max})" 

599 ), 

600 path=path, 

601 ) 

602 ) 

603 if t.warn_max is not None and t.hard_max is not None and t.warn_max >= t.hard_max: 

604 errors.append( 

605 ValidationError( 

606 message=( 

607 f"'throttle.warn_max' ({t.warn_max}) must be less than " 

608 f"'throttle.hard_max' ({t.hard_max})" 

609 ), 

610 path=path, 

611 ) 

612 ) 

613 

614 return errors 

615 

616 

617def _validate_targets(fsm: FSMLoop) -> list[ValidationError]: 

618 """Validate top-level targets[] entries (ENH-1552). 

619 

620 Rejects any targets[].states[] entry whose sibling file: value does not 

621 end with a .yaml extension. 

622 """ 

623 errors: list[ValidationError] = [] 

624 for i, target in enumerate(fsm.targets): 

625 if target.file is not None and not target.file.endswith(".yaml"): 

626 errors.append( 

627 ValidationError( 

628 message=(f"targets[{i}].file must be a .yaml file, got '{target.file}'"), 

629 path=f"targets[{i}].file", 

630 ) 

631 ) 

632 return errors 

633 

634 

635def _validate_failure_terminal_action(fsm: FSMLoop) -> list[ValidationError]: 

636 """Warn when a failure-named terminal state has no diagnostic predecessor. 

637 

638 Failure terminals (failed, error, aborted) should have at least one 

639 predecessor state with an action or sub-loop that provides diagnostic 

640 output before termination. Otherwise the failure is silent — the 

641 executor calls _finish("terminal") before any action on the terminal 

642 itself can execute. 

643 

644 Severity is WARNING (not ERROR) so that existing loops with bare 

645 failure terminals continue to load, and test_terminal_only_state_valid 

646 (which filters by ERROR) passes without modification. 

647 """ 

648 FAILURE_TERMINAL_NAMES: frozenset[str] = frozenset({"failed", "error", "aborted"}) 

649 errors: list[ValidationError] = [] 

650 

651 terminal_states = fsm.get_terminal_states() 

652 failure_terminals = terminal_states & FAILURE_TERMINAL_NAMES 

653 

654 for ft_name in failure_terminals: 

655 has_diagnostic_predecessor = False 

656 for state_name, state in fsm.states.items(): 

657 if state_name == ft_name: 

658 continue 

659 if ft_name in state.get_referenced_states(): 

660 if state.action is not None or state.loop is not None: 

661 has_diagnostic_predecessor = True 

662 break 

663 

664 if not has_diagnostic_predecessor: 

665 errors.append( 

666 ValidationError( 

667 message=( 

668 f"Failure-named terminal state '{ft_name}' has no predecessor " 

669 "state with a diagnostic action. Add a non-terminal diagnostic " 

670 "state (e.g. 'diagnose') with an action or sub-loop that routes " 

671 f"to '{ft_name}'." 

672 ), 

673 path=f"states.{ft_name}", 

674 severity=ValidationSeverity.WARNING, 

675 ) 

676 ) 

677 

678 return errors 

679 

680 

681def validate_fsm(fsm: FSMLoop) -> list[ValidationError]: 

682 """Validate FSM structure and return list of errors. 

683 

684 Performs comprehensive validation: 

685 - Initial state exists 

686 - All referenced states exist 

687 - At least one terminal state 

688 - Evaluator configurations are valid 

689 - Routing configurations are valid 

690 - Numeric fields are in valid ranges (max_iterations > 0, backoff >= 0, timeout > 0) 

691 

692 Args: 

693 fsm: The FSM loop to validate 

694 

695 Returns: 

696 List of validation errors (empty if valid) 

697 """ 

698 errors: list[ValidationError] = [] 

699 defined_states = fsm.get_all_state_names() 

700 

701 # Warn when no top-level description: field is set. The field is optional 

702 # for FSM execution but required for goal-alignment skills (debug-loop-run, 

703 # audit-loop-run) and for ll-loop show --json to surface intent text. 

704 if not fsm.description: 

705 errors.append( 

706 ValidationError( 

707 path="<root>", 

708 message=("No 'description' field defined. Add a top-level description: key."), 

709 severity=ValidationSeverity.WARNING, 

710 ) 

711 ) 

712 

713 # Validate parameters block 

714 errors.extend(_validate_parameters(fsm)) 

715 

716 # Validate targets block (ENH-1552) 

717 errors.extend(_validate_targets(fsm)) 

718 

719 # Check initial state exists 

720 if fsm.initial not in defined_states: 

721 errors.append( 

722 ValidationError( 

723 message=f"Initial state '{fsm.initial}' not found in states", 

724 path="initial", 

725 ) 

726 ) 

727 

728 # Check at least one terminal state 

729 terminal_states = fsm.get_terminal_states() 

730 if not terminal_states: 

731 errors.append( 

732 ValidationError( 

733 message="No terminal state defined. At least one state must have 'terminal: true'", 

734 path="states", 

735 ) 

736 ) 

737 

738 # Validate each state 

739 for state_name, state in fsm.states.items(): 

740 # Check all referenced states exist 

741 refs = state.get_referenced_states() 

742 for ref in refs: 

743 # $current is a special token for retry 

744 if ref != "$current" and ref not in defined_states: 

745 errors.append( 

746 ValidationError( 

747 message=f"References unknown state '{ref}'", 

748 path=f"states.{state_name}", 

749 ) 

750 ) 

751 

752 # Validate action configuration 

753 errors.extend(_validate_state_action(state_name, state)) 

754 

755 # Validate evaluator if present 

756 if state.evaluate is not None: 

757 errors.extend(_validate_evaluator(state_name, state.evaluate)) 

758 

759 # Validate routing configuration 

760 errors.extend(_validate_state_routing(state_name, state)) 

761 

762 # Check numeric field ranges 

763 if fsm.max_iterations <= 0: 

764 errors.append( 

765 ValidationError( 

766 message=f"max_iterations must be > 0, got {fsm.max_iterations}", 

767 path="max_iterations", 

768 ) 

769 ) 

770 if fsm.max_edge_revisits <= 0: 

771 errors.append( 

772 ValidationError( 

773 message=f"max_edge_revisits must be > 0, got {fsm.max_edge_revisits}", 

774 path="max_edge_revisits", 

775 ) 

776 ) 

777 if fsm.backoff is not None and fsm.backoff < 0: 

778 errors.append( 

779 ValidationError( 

780 message=f"backoff must be >= 0, got {fsm.backoff}", 

781 path="backoff", 

782 ) 

783 ) 

784 if fsm.timeout is not None and fsm.timeout <= 0: 

785 errors.append( 

786 ValidationError( 

787 message=f"timeout must be > 0, got {fsm.timeout}", 

788 path="timeout", 

789 ) 

790 ) 

791 if fsm.llm.max_tokens <= 0: 

792 errors.append( 

793 ValidationError( 

794 message=f"llm.max_tokens must be > 0, got {fsm.llm.max_tokens}", 

795 path="llm.max_tokens", 

796 ) 

797 ) 

798 if fsm.llm.timeout <= 0: 

799 errors.append( 

800 ValidationError( 

801 message=f"llm.timeout must be > 0, got {fsm.llm.timeout}", 

802 path="llm.timeout", 

803 ) 

804 ) 

805 

806 # Check for unreachable states (warning only) 

807 reachable = _find_reachable_states(fsm) 

808 unreachable = defined_states - reachable 

809 for state_name in unreachable: 

810 errors.append( 

811 ValidationError( 

812 message="State is not reachable from initial state", 

813 path=f"states.{state_name}", 

814 severity=ValidationSeverity.WARNING, 

815 ) 

816 ) 

817 

818 errors.extend(_validate_failure_terminal_action(fsm)) 

819 

820 return errors 

821 

822 

823def _find_reachable_states(fsm: FSMLoop) -> set[str]: 

824 """Find all states reachable from the initial state. 

825 

826 Uses breadth-first search to find all reachable states. 

827 

828 Args: 

829 fsm: The FSM loop to analyze 

830 

831 Returns: 

832 Set of reachable state names 

833 """ 

834 reachable: set[str] = set() 

835 to_visit: deque[str] = deque([fsm.initial]) 

836 

837 while to_visit: 

838 current = to_visit.popleft() 

839 if current in reachable or current not in fsm.states: 

840 continue 

841 

842 reachable.add(current) 

843 state = fsm.states[current] 

844 refs = state.get_referenced_states() 

845 

846 for ref in refs: 

847 if ref != "$current" and ref not in reachable: 

848 to_visit.append(ref) 

849 

850 return reachable 

851 

852 

853def load_and_validate(path: Path) -> tuple[FSMLoop, list[ValidationError]]: 

854 """Load YAML file and validate FSM structure. 

855 

856 Args: 

857 path: Path to the YAML file to load 

858 

859 Returns: 

860 Tuple of (validated FSMLoop instance, list of WARNING-severity ValidationErrors) 

861 

862 Raises: 

863 FileNotFoundError: If the file doesn't exist 

864 yaml.YAMLError: If the file is not valid YAML 

865 ValueError: If validation fails (contains error details) 

866 """ 

867 if not path.exists(): 

868 raise FileNotFoundError(f"FSM file not found: {path}") 

869 

870 with open(path) as f: 

871 data: dict[str, Any] = yaml.safe_load(f) 

872 

873 if not isinstance(data, dict): 

874 raise ValueError(f"FSM file must contain a YAML mapping, got {type(data)}") 

875 

876 # Resolve `from:` inheritance before any further checks, so a child loop 

877 # can omit fields its parent provides (including `initial`/`states`) and 

878 # so a parent's `import:`/`fragments:` blocks survive into the merged 

879 # result for the subsequent `resolve_fragments` pass. 

880 data = resolve_inheritance(data, path.parent) 

881 

882 # Expand flow: linear shorthand into states: before required-fields check 

883 data = resolve_flow(data) 

884 

885 # Check required fields before parsing 

886 missing = [] 

887 for field in ["name", "initial"]: 

888 if field not in data: 

889 missing.append(field) 

890 if "states" not in data: 

891 missing.append("states (or flow)") 

892 

893 if missing: 

894 raise ValueError(f"FSM file missing required fields: {', '.join(missing)}") 

895 

896 # Check for unknown top-level keys before parsing 

897 unknown_key_warnings: list[ValidationError] = [] 

898 unknown = set(data.keys()) - KNOWN_TOP_LEVEL_KEYS 

899 if unknown: 

900 unknown_key_warnings.append( 

901 ValidationError( 

902 path="<root>", 

903 message=f"Unknown top-level keys: {', '.join(sorted(unknown))}", 

904 severity=ValidationSeverity.WARNING, 

905 ) 

906 ) 

907 

908 # Resolve fragment libraries before parsing into dataclass 

909 data = resolve_fragments(data, path.parent) 

910 

911 # Parse into dataclass 

912 fsm = FSMLoop.from_dict(data) 

913 

914 # Validate 

915 errors = validate_fsm(fsm) 

916 

917 # Validate with: bindings against child loop parameters (requires file-system access) 

918 errors.extend(_validate_with_bindings(fsm, path.parent)) 

919 

920 # Filter to errors only (not warnings) for raising 

921 error_list = [e for e in errors if e.severity == ValidationSeverity.ERROR] 

922 

923 if error_list: 

924 error_messages = "\n ".join(str(e) for e in error_list) 

925 raise ValueError(f"FSM validation failed:\n {error_messages}") 

926 

927 # Collect all warnings (unknown-key warnings + structural warnings) 

928 struct_warnings = [e for e in errors if e.severity == ValidationSeverity.WARNING] 

929 all_warnings = unknown_key_warnings + struct_warnings 

930 for warning in all_warnings: 

931 logger.warning(str(warning)) 

932 

933 return fsm, all_warnings