Coverage for agentos/prompts/optimizer.py: 58%

80 statements  

« prev     ^ index     » next       coverage.py v7.14.3, created at 2026-07-02 09:59 +0800

1""" 

2Prompt Optimizer — DSPy-inspired automatic prompt improvement via 

3iterative refinement, few-shot bootstrapping, and multi-strategy optimization. 

4""" 

5 

6import random 

7from dataclasses import dataclass, field 

8from enum import Enum, auto 

9from typing import Any, Callable, Dict, List, Optional 

10 

11 

12class OptimizationStrategy(str, Enum): 

13 """Available optimization approaches.""" 

14 

15 BOOTSTRAP_FEWSHOT = "bootstrap_fewshot" 

16 MIPRO = "mipro" # Multi-prompt instruction proposal 

17 GRADIENT_FREE = "gradient_free" 

18 ENSEMBLE = "ensemble" 

19 CHAIN_OF_THOUGHT = "chain_of_thought" 

20 

21 

22@dataclass 

23class OptimizerConfig: 

24 """Configuration for prompt optimization runs.""" 

25 

26 strategy: OptimizationStrategy = OptimizationStrategy.BOOTSTRAP_FEWSHOT 

27 max_iterations: int = 10 

28 candidates_per_iteration: int = 4 

29 eval_samples: int = 20 

30 target_metric: str = "accuracy" 

31 target_threshold: float = 0.90 

32 temperature_range: tuple[float, float] = (0.1, 0.9) 

33 keep_top_k: int = 3 

34 early_stop_patience: int = 3 

35 seed: int = 42 

36 

37 

38@dataclass 

39class PromptCandidate: 

40 """A single prompt variant under evaluation.""" 

41 

42 id: str 

43 text: str 

44 score: float = 0.0 

45 metrics: dict[str, float] = field(default_factory=dict) 

46 generation: int = 0 

47 parent_id: str = "" 

48 

49 

50@dataclass 

51class OptimizationResult: 

52 """Final result after optimization converges or exhausts budget.""" 

53 

54 best_prompt: str 

55 best_score: float 

56 iterations: int 

57 candidates_evaluated: int 

58 strategy: OptimizationStrategy 

59 history: list[PromptCandidate] = field(default_factory=list) 

60 metadata: dict[str, Any] = field(default_factory=dict) 

61 

62 

63class PromptOptimizer: 

64 """Iteratively refines prompts using a pluggable scoring function. 

65 

66 Usage:: 

67 

68 def score(prompt: str) -> float: 

69 # run your LLM eval and return metric 

70 return measure(prompt) 

71 

72 opt = PromptOptimizer(config) 

73 result = opt.optimize(base_prompt, score_fn=score) 

74 print(result.best_prompt) 

75 """ 

76 

77 SEED_TEMPLATES = { 

78 OptimizationStrategy.BOOTSTRAP_FEWSHOT: [ 

79 "{base}\n\nHere are some examples:\n{examples}", 

80 "Task: {base}\n\nIllustrative examples:\n{examples}", 

81 "{base}\n\nDemonstrations:\n{examples}", 

82 ], 

83 OptimizationStrategy.CHAIN_OF_THOUGHT: [ 

84 "{base}\n\nLet's think step by step.", 

85 "{base}\n\nBreak this down logically:", 

86 "Solve step-by-step:\n{base}", 

87 ], 

88 OptimizationStrategy.ENSEMBLE: [ 

89 "Consider multiple perspectives:\n{base}", 

90 "Review from different angles:\n{base}", 

91 "Analyze comprehensively:\n{base}", 

92 ], 

93 } 

94 

95 def __init__(self, config: Optional[OptimizerConfig] = None): 

96 self.config = config or OptimizerConfig() 

97 random.seed(self.config.seed) 

98 

99 def optimize( 

100 self, 

101 base_prompt: str, 

102 score_fn: Callable[[str], float], 

103 few_shot_examples: list[str] | None = None, 

104 ) -> OptimizationResult: 

105 """Run optimization and return the best prompt found.""" 

106 best = PromptCandidate( 

107 id="base", 

108 text=base_prompt, 

109 score=score_fn(base_prompt), 

110 generation=0, 

111 ) 

112 history = [best] 

113 no_improve = 0 

114 

115 for iteration in range(1, self.config.max_iterations + 1): 

116 candidates = self._generate_candidates( 

117 best.text, iteration, few_shot_examples 

118 ) 

119 for c in candidates: 

120 c.score = score_fn(c.text) 

121 history.append(c) 

122 

123 # Select best from this iteration 

124 iteration_best = max(candidates, key=lambda c: c.score) 

125 if iteration_best.score > best.score: 

126 best = iteration_best 

127 no_improve = 0 

128 else: 

129 no_improve += 1 

130 

131 # Keep top-K across all generations 

132 history.sort(key=lambda c: c.score, reverse=True) 

133 history = history[:self.config.keep_top_k * 2] 

134 

135 if best.score >= self.config.target_threshold: 

136 break 

137 if no_improve >= self.config.early_stop_patience: 

138 break 

139 

140 return OptimizationResult( 

141 best_prompt=best.text, 

142 best_score=best.score, 

143 iterations=iteration, 

144 candidates_evaluated=len(history), 

145 strategy=self.config.strategy, 

146 history=history[:self.config.keep_top_k], 

147 ) 

148 

149 def _generate_candidates( 

150 self, 

151 base: str, 

152 generation: int, 

153 examples: list[str] | None, 

154 ) -> list[PromptCandidate]: 

155 templates = self.SEED_TEMPLATES.get( 

156 self.config.strategy, 

157 self.SEED_TEMPLATES[OptimizationStrategy.BOOTSTRAP_FEWSHOT], 

158 ) 

159 candidates: list[PromptCandidate] = [] 

160 

161 for i in range(self.config.candidates_per_iteration): 

162 tmpl = random.choice(templates) 

163 text = tmpl.format( 

164 base=base, 

165 examples=self._format_examples(examples) if examples else "", 

166 ) 

167 # Add small perturbations 

168 if random.random() < 0.3 and generation > 1: 

169 text = self._perturb(text) 

170 

171 candidates.append(PromptCandidate( 

172 id=f"gen{generation}_{i}", 

173 text=text, 

174 generation=generation, 

175 parent_id="base" if generation == 1 else f"gen{generation-1}_0", 

176 )) 

177 

178 return candidates 

179 

180 def _format_examples(self, examples: list[str]) -> str: 

181 return "\n".join(f"- {e}" for e in examples[:5]) 

182 

183 def _perturb(self, text: str) -> str: 

184 """Apply minor random perturbations.""" 

185 perturbations = [ 

186 lambda t: t.replace(".", ". Please be thorough."), 

187 lambda t: "Carefully: " + t, 

188 lambda t: t + "\nBe precise and concise.", 

189 lambda t: t.replace(":", ":\n"), 

190 lambda t: t.replace("the ", "the relevant "), 

191 ] 

192 return random.choice(perturbations)(text)