Coverage for src/duelboard/calculators/mle.py: 100%
76 statements
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 19:18 +0900
« prev ^ index » next coverage.py v7.10.3, created at 2025-08-14 19:18 +0900
1"""Maximum Likelihood Estimation Elo calculator."""
3import math
5import numpy as np
6import pandas as pd
7from sklearn.linear_model import LogisticRegression
8from tqdm import tqdm
10from duelboard.models import Battle, BattleOutcome, EloRating
11from duelboard.types import RatingsDict
13from .base import EloCalculator
16class MLEEloCalculator(EloCalculator):
17 """Maximum Likelihood Estimation Elo calculator using logistic regression."""
19 def __init__(
20 self,
21 scale: float = 400,
22 base: float = 10,
23 initial_rating: float = 1000,
24 *,
25 fit_intercept: bool = False,
26 max_iter: int = 1000,
27 random_state: int | None = None,
28 ) -> None:
29 """Initialize the MLE Elo calculator.
31 Args:
32 scale: Scale parameter for Elo calculation
33 base: Base for exponential calculation
34 initial_rating: Initial rating for new players
35 fit_intercept: Whether to fit intercept in logistic regression
36 max_iter: Maximum iterations for logistic regression
37 random_state: Random state for reproducibility
38 """
39 # We don't use k_factor for MLE, but keep it for consistency
40 super().__init__(k_factor=1, scale=scale, base=base, initial_rating=initial_rating)
41 self.fit_intercept = fit_intercept
42 self.max_iter = max_iter
43 self.random_state = random_state
45 def calculate(self, battles: list[Battle] | pd.DataFrame) -> RatingsDict:
46 """Calculate Elo ratings using Maximum Likelihood Estimation.
48 Args:
49 battles: List of Battle objects or DataFrame with battles
51 Returns:
52 Dictionary mapping player names to EloRating objects
53 """
54 if isinstance(battles, pd.DataFrame):
55 df = battles.copy()
56 # Filter out ties for MLE (only works with binary outcomes)
57 df = df[~df["winner"].str.contains("tie", case=False, na=False)]
58 else:
59 # Convert to DataFrame and filter ties
60 battle_data = [
61 {
62 "player_a": battle.player_a,
63 "player_b": battle.player_b,
64 "winner": battle.outcome.value,
65 }
66 for battle in battles
67 if battle.outcome not in (BattleOutcome.TIE, BattleOutcome.TIE_BOTHBAD)
68 ]
69 df = pd.DataFrame(battle_data)
71 if df.empty:
72 return {}
74 # Get unique players and create mapping
75 players = pd.concat([df["player_a"], df["player_b"]]).unique()
76 player_to_idx = pd.Series(np.arange(len(players)), index=players)
77 n_players = len(players)
78 n_battles = df.shape[0]
80 # Create design matrix X
81 x = np.zeros([n_battles, n_players])
82 x[np.arange(n_battles), player_to_idx[df["player_a"]]] = math.log(self.base)
83 x[np.arange(n_battles), player_to_idx[df["player_b"]]] = -math.log(self.base)
85 # Create outcome vector Y
86 y = np.zeros(n_battles)
87 y[df["winner"] == "player_a"] = 1.0
89 # Fit logistic regression
90 lr = LogisticRegression(
91 fit_intercept=self.fit_intercept,
92 max_iter=self.max_iter,
93 random_state=self.random_state,
94 )
95 lr.fit(x, y)
97 # Convert coefficients to Elo scores
98 elo_scores = self.scale * lr.coef_[0] + self.initial_rating
100 # Count battles for each player
101 battle_counts = {}
102 for player in players:
103 count = ((df["player_a"] == player) | (df["player_b"] == player)).sum()
104 battle_counts[player] = count
106 # Create results
107 results = {}
108 for i, player in enumerate(players):
109 results[player] = EloRating(
110 player=player,
111 rating=elo_scores[i],
112 battles=battle_counts[player],
113 )
115 return results
117 def calculate_with_bootstrap(
118 self,
119 battles: list[Battle] | pd.DataFrame,
120 n_bootstrap: int = 500,
121 confidence_level: float = 0.95,
122 ) -> RatingsDict:
123 """Calculate MLE Elo ratings with bootstrap confidence intervals.
125 Args:
126 battles: List of Battle objects or DataFrame with battles
127 n_bootstrap: Number of bootstrap samples
128 confidence_level: Confidence level for intervals
130 Returns:
131 Dictionary of EloRating objects with confidence intervals
132 """
133 if isinstance(battles, pd.DataFrame):
134 df = battles.copy()
135 df = df[~df["winner"].str.contains("tie", case=False, na=False)]
136 else:
137 battle_data = [
138 {
139 "player_a": battle.player_a,
140 "player_b": battle.player_b,
141 "winner": battle.outcome.value,
142 }
143 for battle in battles
144 if battle.outcome not in (BattleOutcome.TIE, BattleOutcome.TIE_BOTHBAD)
145 ]
146 df = pd.DataFrame(battle_data)
148 if df.empty:
149 return {}
151 bootstrap_results = []
153 for _ in tqdm(range(n_bootstrap), desc="MLE Bootstrap"):
154 # Bootstrap sample
155 sampled_df = df.sample(n=len(df), replace=True).reset_index(drop=True)
157 try:
158 # Calculate ratings for this sample
159 sample_ratings = self.calculate(sampled_df)
160 bootstrap_results.append({player: rating.rating for player, rating in sample_ratings.items()})
161 except Exception:
162 # Skip failed bootstrap samples
163 continue
165 if not bootstrap_results:
166 # Fallback to regular calculation if bootstrap fails
167 return self.calculate(df)
169 # Calculate confidence intervals
170 bootstrap_df = pd.DataFrame(bootstrap_results)
171 alpha = 1 - confidence_level
172 lower_quantile = alpha / 2
173 upper_quantile = 1 - alpha / 2
175 # Count battles for each player
176 battle_counts = {}
177 players = pd.concat([df["player_a"], df["player_b"]]).unique()
178 for player in players:
179 count = ((df["player_a"] == player) | (df["player_b"] == player)).sum()
180 battle_counts[player] = count
182 results = {}
183 for player in bootstrap_df.columns:
184 median_rating = bootstrap_df[player].median()
185 lower_bound = bootstrap_df[player].quantile(lower_quantile)
186 upper_bound = bootstrap_df[player].quantile(upper_quantile)
188 results[player] = EloRating(
189 player=player,
190 rating=median_rating,
191 confidence_interval=(lower_bound, upper_bound),
192 battles=battle_counts.get(player, 0),
193 )
195 return results