Coverage for src/usaspending/utils/retry.py: 65%
71 statements
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-03 17:15 -0700
« prev ^ index » next coverage.py v7.10.6, created at 2025-09-03 17:15 -0700
1"""Retry logic implementation for USASpending API client."""
3from __future__ import annotations
5import random
6import time
7from typing import Any, Callable, Optional
9import requests
11from ..config import config
12from ..exceptions import HTTPError, RateLimitError
13from ..logging_config import USASpendingLogger
15logger = USASpendingLogger.get_logger(__name__)
18class RetryHandler:
19 """
20 Handles retry logic with exponential backoff for API requests.
22 This implementation retries requests that fail due to transient errors
23 like network issues, server errors (5xx), and rate limiting (429).
24 """
26 # HTTP status codes that should be retried
27 RETRYABLE_STATUS_CODES = {
28 429, # Too Many Requests (rate limit)
29 500, # Internal Server Error
30 502, # Bad Gateway
31 503, # Service Unavailable
32 504, # Gateway Timeout
33 520, # Unknown Error (Cloudflare)
34 521, # Web Server Is Down
35 522, # Connection Timed Out
36 523, # Origin Is Unreachable
37 524, # A Timeout Occurred
38 }
40 # Exception types that should be retried
41 RETRYABLE_EXCEPTIONS = (
42 requests.exceptions.ConnectionError,
43 requests.exceptions.Timeout,
44 requests.exceptions.ConnectTimeout,
45 requests.exceptions.ReadTimeout,
46 )
48 def __init__(self):
49 """
50 Initialize the retry handler.
52 Args:
53 config: Configuration object with retry settings
54 """
55 self.max_retries = config.max_retries
56 self.base_delay = config.retry_delay
57 self.backoff_factor = config.retry_backoff
59 logger.debug(
60 f"Initialized RetryHandler: max_retries={self.max_retries}, "
61 f"base_delay={self.base_delay}s, backoff_factor={self.backoff_factor}"
62 )
64 def execute(self, func: Callable, *args, **kwargs) -> Any:
65 """
66 Execute a function with retry logic.
68 Args:
69 func: Function to execute (typically session.request)
70 *args: Positional arguments to pass to the function
71 **kwargs: Keyword arguments to pass to the function
73 Returns:
74 The result of the successful function call
76 Raises:
77 The last exception encountered if all retries are exhausted
78 """
79 last_exception = None
81 for attempt in range(self.max_retries + 1): # +1 for the initial attempt
82 try:
83 result = func(*args, **kwargs)
85 # Check if the result is a response object with a status code
86 if hasattr(result, "status_code"):
87 self._check_response_for_retry(result, attempt)
89 return result
91 except Exception as e:
92 last_exception = e
94 # Don't retry on the last attempt
95 if attempt == self.max_retries:
96 logger.warning(
97 f"Max retries ({self.max_retries}) exhausted. Final error: {e}"
98 )
99 break
101 # Check if this exception should be retried
102 if not self._should_retry_exception(e):
103 logger.debug(f"Exception {type(e).__name__} is not retryable")
104 break
106 # Calculate delay and wait before retrying
107 delay = self._calculate_delay(attempt, e)
108 if delay > 0:
109 logger.info(
110 f"Retry attempt {attempt + 1}/{self.max_retries} after {delay:.2f}s "
111 f"due to {type(e).__name__}: {e}"
112 )
113 time.sleep(delay)
115 # If we get here, all retries were exhausted
116 raise last_exception
118 def _check_response_for_retry(
119 self, response: requests.Response, attempt: int
120 ) -> None:
121 """
122 Check if a response should trigger a retry.
124 Args:
125 response: The HTTP response object
126 attempt: Current attempt number (0-based)
128 Raises:
129 Various exceptions if retry should occur
130 """
131 if response.status_code in self.RETRYABLE_STATUS_CODES:
132 if response.status_code == 429:
133 # Rate limit exceeded
134 retry_after = self._get_retry_after_header(response)
135 logger.warning(
136 f"Rate limit hit (HTTP 429). Retry-After: {retry_after}s"
137 )
138 raise RateLimitError("Rate limit exceeded", retry_after=retry_after)
139 elif response.status_code >= 500:
140 # Server error
141 logger.warning(f"Server error (HTTP {response.status_code})")
142 raise HTTPError(
143 f"Server error: HTTP {response.status_code}",
144 status_code=response.status_code,
145 )
147 def _should_retry_exception(self, exception: Exception) -> bool:
148 """
149 Determine if an exception should trigger a retry.
151 Args:
152 exception: The exception that occurred
154 Returns:
155 True if the exception should be retried, False otherwise
156 """
157 # Always retry these network-related exceptions
158 if isinstance(exception, self.RETRYABLE_EXCEPTIONS):
159 return True
161 # Retry rate limit errors
162 if isinstance(exception, RateLimitError):
163 return True
165 # Retry HTTP errors with retryable status codes
166 if isinstance(exception, HTTPError):
167 return exception.status_code in self.RETRYABLE_STATUS_CODES
169 # Don't retry other exceptions (like validation errors, auth errors, etc.)
170 return False
172 def _calculate_delay(self, attempt: int, exception: Exception) -> float:
173 """
174 Calculate the delay before the next retry attempt.
176 Args:
177 attempt: Current attempt number (0-based)
178 exception: The exception that triggered this retry
180 Returns:
181 Delay in seconds before the next attempt
182 """
183 # Handle rate limit errors specially
184 if isinstance(exception, RateLimitError) and exception.retry_after:
185 # Use the server-provided retry-after value
186 return float(exception.retry_after)
188 # Calculate exponential backoff with jitter
189 delay = self.base_delay * (self.backoff_factor**attempt)
191 # Add jitter (randomness) to avoid thundering herd problem
192 # Use up to 25% jitter
193 jitter = delay * 0.25 * random.random()
194 delay += jitter
196 logger.debug(f"Calculated retry delay: {delay:.3f}s (attempt {attempt})")
197 return delay
199 def _get_retry_after_header(self, response: requests.Response) -> Optional[int]:
200 """
201 Extract the Retry-After header value from a rate limit response.
203 Args:
204 response: The HTTP response object
206 Returns:
207 Number of seconds to wait, or None if header not present
208 """
209 retry_after = response.headers.get("Retry-After")
210 if retry_after:
211 try:
212 return int(retry_after)
213 except ValueError:
214 # Header might be in HTTP-date format, but we'll just ignore it
215 # and use exponential backoff instead
216 pass
217 return None