Coverage for src/duelboard/calculators/mle.py: 100%

76 statements  

« prev     ^ index     » next       coverage.py v7.10.3, created at 2025-08-14 19:18 +0900

1"""Maximum Likelihood Estimation Elo calculator.""" 

2 

3import math 

4 

5import numpy as np 

6import pandas as pd 

7from sklearn.linear_model import LogisticRegression 

8from tqdm import tqdm 

9 

10from duelboard.models import Battle, BattleOutcome, EloRating 

11from duelboard.types import RatingsDict 

12 

13from .base import EloCalculator 

14 

15 

16class MLEEloCalculator(EloCalculator): 

17 """Maximum Likelihood Estimation Elo calculator using logistic regression.""" 

18 

19 def __init__( 

20 self, 

21 scale: float = 400, 

22 base: float = 10, 

23 initial_rating: float = 1000, 

24 *, 

25 fit_intercept: bool = False, 

26 max_iter: int = 1000, 

27 random_state: int | None = None, 

28 ) -> None: 

29 """Initialize the MLE Elo calculator. 

30 

31 Args: 

32 scale: Scale parameter for Elo calculation 

33 base: Base for exponential calculation 

34 initial_rating: Initial rating for new players 

35 fit_intercept: Whether to fit intercept in logistic regression 

36 max_iter: Maximum iterations for logistic regression 

37 random_state: Random state for reproducibility 

38 """ 

39 # We don't use k_factor for MLE, but keep it for consistency 

40 super().__init__(k_factor=1, scale=scale, base=base, initial_rating=initial_rating) 

41 self.fit_intercept = fit_intercept 

42 self.max_iter = max_iter 

43 self.random_state = random_state 

44 

45 def calculate(self, battles: list[Battle] | pd.DataFrame) -> RatingsDict: 

46 """Calculate Elo ratings using Maximum Likelihood Estimation. 

47 

48 Args: 

49 battles: List of Battle objects or DataFrame with battles 

50 

51 Returns: 

52 Dictionary mapping player names to EloRating objects 

53 """ 

54 if isinstance(battles, pd.DataFrame): 

55 df = battles.copy() 

56 # Filter out ties for MLE (only works with binary outcomes) 

57 df = df[~df["winner"].str.contains("tie", case=False, na=False)] 

58 else: 

59 # Convert to DataFrame and filter ties 

60 battle_data = [ 

61 { 

62 "player_a": battle.player_a, 

63 "player_b": battle.player_b, 

64 "winner": battle.outcome.value, 

65 } 

66 for battle in battles 

67 if battle.outcome not in (BattleOutcome.TIE, BattleOutcome.TIE_BOTHBAD) 

68 ] 

69 df = pd.DataFrame(battle_data) 

70 

71 if df.empty: 

72 return {} 

73 

74 # Get unique players and create mapping 

75 players = pd.concat([df["player_a"], df["player_b"]]).unique() 

76 player_to_idx = pd.Series(np.arange(len(players)), index=players) 

77 n_players = len(players) 

78 n_battles = df.shape[0] 

79 

80 # Create design matrix X 

81 x = np.zeros([n_battles, n_players]) 

82 x[np.arange(n_battles), player_to_idx[df["player_a"]]] = math.log(self.base) 

83 x[np.arange(n_battles), player_to_idx[df["player_b"]]] = -math.log(self.base) 

84 

85 # Create outcome vector Y 

86 y = np.zeros(n_battles) 

87 y[df["winner"] == "player_a"] = 1.0 

88 

89 # Fit logistic regression 

90 lr = LogisticRegression( 

91 fit_intercept=self.fit_intercept, 

92 max_iter=self.max_iter, 

93 random_state=self.random_state, 

94 ) 

95 lr.fit(x, y) 

96 

97 # Convert coefficients to Elo scores 

98 elo_scores = self.scale * lr.coef_[0] + self.initial_rating 

99 

100 # Count battles for each player 

101 battle_counts = {} 

102 for player in players: 

103 count = ((df["player_a"] == player) | (df["player_b"] == player)).sum() 

104 battle_counts[player] = count 

105 

106 # Create results 

107 results = {} 

108 for i, player in enumerate(players): 

109 results[player] = EloRating( 

110 player=player, 

111 rating=elo_scores[i], 

112 battles=battle_counts[player], 

113 ) 

114 

115 return results 

116 

117 def calculate_with_bootstrap( 

118 self, 

119 battles: list[Battle] | pd.DataFrame, 

120 n_bootstrap: int = 500, 

121 confidence_level: float = 0.95, 

122 ) -> RatingsDict: 

123 """Calculate MLE Elo ratings with bootstrap confidence intervals. 

124 

125 Args: 

126 battles: List of Battle objects or DataFrame with battles 

127 n_bootstrap: Number of bootstrap samples 

128 confidence_level: Confidence level for intervals 

129 

130 Returns: 

131 Dictionary of EloRating objects with confidence intervals 

132 """ 

133 if isinstance(battles, pd.DataFrame): 

134 df = battles.copy() 

135 df = df[~df["winner"].str.contains("tie", case=False, na=False)] 

136 else: 

137 battle_data = [ 

138 { 

139 "player_a": battle.player_a, 

140 "player_b": battle.player_b, 

141 "winner": battle.outcome.value, 

142 } 

143 for battle in battles 

144 if battle.outcome not in (BattleOutcome.TIE, BattleOutcome.TIE_BOTHBAD) 

145 ] 

146 df = pd.DataFrame(battle_data) 

147 

148 if df.empty: 

149 return {} 

150 

151 bootstrap_results = [] 

152 

153 for _ in tqdm(range(n_bootstrap), desc="MLE Bootstrap"): 

154 # Bootstrap sample 

155 sampled_df = df.sample(n=len(df), replace=True).reset_index(drop=True) 

156 

157 try: 

158 # Calculate ratings for this sample 

159 sample_ratings = self.calculate(sampled_df) 

160 bootstrap_results.append({player: rating.rating for player, rating in sample_ratings.items()}) 

161 except Exception: 

162 # Skip failed bootstrap samples 

163 continue 

164 

165 if not bootstrap_results: 

166 # Fallback to regular calculation if bootstrap fails 

167 return self.calculate(df) 

168 

169 # Calculate confidence intervals 

170 bootstrap_df = pd.DataFrame(bootstrap_results) 

171 alpha = 1 - confidence_level 

172 lower_quantile = alpha / 2 

173 upper_quantile = 1 - alpha / 2 

174 

175 # Count battles for each player 

176 battle_counts = {} 

177 players = pd.concat([df["player_a"], df["player_b"]]).unique() 

178 for player in players: 

179 count = ((df["player_a"] == player) | (df["player_b"] == player)).sum() 

180 battle_counts[player] = count 

181 

182 results = {} 

183 for player in bootstrap_df.columns: 

184 median_rating = bootstrap_df[player].median() 

185 lower_bound = bootstrap_df[player].quantile(lower_quantile) 

186 upper_bound = bootstrap_df[player].quantile(upper_quantile) 

187 

188 results[player] = EloRating( 

189 player=player, 

190 rating=median_rating, 

191 confidence_interval=(lower_bound, upper_bound), 

192 battles=battle_counts.get(player, 0), 

193 ) 

194 

195 return results