Coverage for /Users/antonigmitruk/golf/src/golf/core/telemetry.py: 0%

201 statements  

« prev     ^ index     » next       coverage.py v7.6.12, created at 2025-08-16 18:46 +0200

1"""Telemetry module for anonymous usage tracking with PostHog.""" 

2 

3import hashlib 

4import json 

5import os 

6import platform 

7import uuid 

8from pathlib import Path 

9from typing import Any 

10 

11import posthog 

12from rich.console import Console 

13 

14from golf import __version__ 

15 

16console = Console() 

17 

18# PostHog configuration 

19# This is a client-side API key, safe to be public 

20# Users can override with GOLF_POSTHOG_API_KEY environment variable 

21DEFAULT_POSTHOG_API_KEY = "phc_7ccsDDxoC5tK5hodlrs2moGC74cThRzcN63flRYPWGl" 

22POSTHOG_API_KEY = os.environ.get("GOLF_POSTHOG_API_KEY", DEFAULT_POSTHOG_API_KEY) 

23POSTHOG_HOST = "https://us.i.posthog.com" 

24 

25# Telemetry state 

26_telemetry_enabled: bool | None = None 

27_anonymous_id: str | None = None 

28_user_identified: bool = False # Track if we've already identified the user 

29 

30 

31def _is_test_mode() -> bool: 

32 """Check if we're in test mode.""" 

33 return os.environ.get("GOLF_TEST_MODE", "").lower() in ("1", "true", "yes", "on") 

34 

35 

36def _ensure_posthog_disabled_in_test_mode() -> None: 

37 """Ensure PostHog is disabled when in test mode.""" 

38 if _is_test_mode() and not posthog.disabled: 

39 posthog.disabled = True 

40 

41 

42def get_telemetry_config_path() -> Path: 

43 """Get the path to the telemetry configuration file.""" 

44 return Path.home() / ".golf" / "telemetry.json" 

45 

46 

47def save_telemetry_preference(enabled: bool) -> None: 

48 """Save telemetry preference to persistent storage.""" 

49 config_path = get_telemetry_config_path() 

50 config_path.parent.mkdir(parents=True, exist_ok=True) 

51 

52 config = {"enabled": enabled, "version": 1} 

53 

54 try: 

55 with open(config_path, "w") as f: 

56 json.dump(config, f) 

57 except Exception: 

58 # Don't fail if we can't save the preference 

59 pass 

60 

61 

62def load_telemetry_preference() -> bool | None: 

63 """Load telemetry preference from persistent storage.""" 

64 config_path = get_telemetry_config_path() 

65 

66 if not config_path.exists(): 

67 return None 

68 

69 try: 

70 with open(config_path) as f: 

71 config = json.load(f) 

72 return config.get("enabled") 

73 except Exception: 

74 return None 

75 

76 

77def is_telemetry_enabled() -> bool: 

78 """Check if telemetry is enabled. 

79 

80 Checks in order: 

81 1. Cached value 

82 2. GOLF_TEST_MODE environment variable (always disabled in test mode) 

83 3. GOLF_TELEMETRY environment variable 

84 4. Persistent preference file 

85 5. Default to False (opt-in model) 

86 """ 

87 global _telemetry_enabled 

88 

89 if _telemetry_enabled is not None: 

90 return _telemetry_enabled 

91 

92 # Check if we're in test mode (highest priority after cache) 

93 if _is_test_mode(): 

94 _telemetry_enabled = False 

95 return False 

96 

97 # Check environment variables (second highest priority) 

98 env_telemetry = os.environ.get("GOLF_TELEMETRY", "").lower() 

99 if env_telemetry in ("0", "false", "no", "off"): 

100 _telemetry_enabled = False 

101 return False 

102 elif env_telemetry in ("1", "true", "yes", "on"): 

103 _telemetry_enabled = True 

104 return True 

105 

106 # Check persistent preference 

107 saved_preference = load_telemetry_preference() 

108 if saved_preference is not None: 

109 _telemetry_enabled = saved_preference 

110 return saved_preference 

111 

112 # Default to disabled (opt-in model) 

113 _telemetry_enabled = False 

114 return False 

115 

116 

117def set_telemetry_enabled(enabled: bool, persist: bool = True) -> None: 

118 """Set telemetry enabled state. 

119 

120 Args: 

121 enabled: Whether telemetry should be enabled 

122 persist: Whether to save this preference persistently 

123 """ 

124 global _telemetry_enabled 

125 _telemetry_enabled = enabled 

126 

127 if persist: 

128 save_telemetry_preference(enabled) 

129 

130 

131def get_anonymous_id() -> str: 

132 """Get or create a persistent anonymous ID for this machine. 

133 

134 The ID is stored in the user's home directory and is unique per installation. 

135 """ 

136 global _anonymous_id 

137 

138 if _anonymous_id: 

139 return _anonymous_id 

140 

141 # Try to load existing ID 

142 id_file = Path.home() / ".golf" / "telemetry_id" 

143 

144 if id_file.exists(): 

145 try: 

146 _anonymous_id = id_file.read_text().strip() 

147 # Check if ID is in the old format (no hyphen between hash and 

148 # random component) 

149 # Old format: golf-[8 chars hash][8 chars random] 

150 # New format: golf-[8 chars hash]-[8 chars random] 

151 if _anonymous_id and _anonymous_id.startswith("golf-") and len(_anonymous_id) == 21: 

152 # This is likely the old format, regenerate 

153 _anonymous_id = None 

154 elif _anonymous_id: 

155 return _anonymous_id 

156 except Exception: 

157 pass 

158 

159 # Generate new ID with more unique data 

160 # Use only non-identifying system information 

161 

162 # Combine non-identifying factors for uniqueness 

163 machine_data = f"{platform.machine()}-{platform.system()}-{platform.python_version()}" 

164 machine_hash = hashlib.sha256(machine_data.encode()).hexdigest()[:8] 

165 

166 # Add a random component to ensure uniqueness 

167 random_component = str(uuid.uuid4()).split("-")[0] # First 8 chars of UUID 

168 

169 # Use hyphen separator for clarity and ensure PostHog treats these as different IDs 

170 _anonymous_id = f"golf-{machine_hash}-{random_component}" 

171 

172 # Try to save for next time 

173 try: 

174 id_file.parent.mkdir(parents=True, exist_ok=True) 

175 id_file.write_text(_anonymous_id) 

176 except Exception: 

177 # Not critical if we can't save 

178 pass 

179 

180 return _anonymous_id 

181 

182 

183def initialize_telemetry() -> None: 

184 """Initialize PostHog telemetry if enabled.""" 

185 # Ensure PostHog is disabled in test mode 

186 _ensure_posthog_disabled_in_test_mode() 

187 

188 # Don't initialize if PostHog is disabled (test mode) 

189 if posthog.disabled: 

190 return 

191 

192 if not is_telemetry_enabled(): 

193 return 

194 

195 # Skip initialization if no valid API key (empty or placeholder) 

196 if not POSTHOG_API_KEY or POSTHOG_API_KEY.startswith("phc_YOUR"): 

197 return 

198 

199 try: 

200 posthog.project_api_key = POSTHOG_API_KEY 

201 posthog.host = POSTHOG_HOST 

202 

203 # Disable PostHog's own logging to avoid noise 

204 posthog.disabled = False 

205 posthog.debug = False 

206 

207 # Disable IP collection and GeoIP enrichment at the SDK level 

208 posthog.set_global_event_properties( 

209 { 

210 "$ip": "0", # Override IP with dummy value to prevent collection 

211 "$geoip_disable": True, # Disable all GeoIP enrichment 

212 } 

213 ) 

214 

215 except Exception: 

216 # Telemetry should never break the application 

217 pass 

218 

219 

220def track_event(event_name: str, properties: dict[str, Any] | None = None) -> None: 

221 """Track an anonymous event with NO IP address or geolocation data. 

222 

223 IP collection and GeoIP enrichment are disabled at the SDK level to ensure 

224 complete privacy protection. No IP addresses or location data ever reach PostHog. 

225 

226 Args: 

227 event_name: Name of the event (e.g., "cli_init", "cli_build") 

228 properties: Optional properties to include with the event 

229 """ 

230 global _user_identified 

231 

232 # Ensure PostHog is disabled in test mode 

233 _ensure_posthog_disabled_in_test_mode() 

234 

235 # Early return if PostHog is disabled (test mode) 

236 if posthog.disabled: 

237 return 

238 

239 if not is_telemetry_enabled(): 

240 return 

241 

242 # Skip if no valid API key (empty or placeholder) 

243 if not POSTHOG_API_KEY or POSTHOG_API_KEY.startswith("phc_YOUR"): 

244 return 

245 

246 try: 

247 # Initialize if needed 

248 if posthog.project_api_key != POSTHOG_API_KEY: 

249 initialize_telemetry() 

250 

251 # Get anonymous ID 

252 anonymous_id = get_anonymous_id() 

253 

254 # Only identify the user once per session 

255 if not _user_identified: 

256 # Set person properties to differentiate installations 

257 # Only include non-identifying information 

258 person_properties = { 

259 "$set": { 

260 "golf_version": __version__, 

261 "os": platform.system(), 

262 "python_version": (f"{platform.python_version_tuple()[0]}.{platform.python_version_tuple()[1]}"), 

263 } 

264 } 

265 

266 # Identify the user with properties (IP tracking disabled) 

267 posthog.identify( 

268 distinct_id=anonymous_id, 

269 properties={ 

270 **person_properties, 

271 # Explicitly disable IP tracking in identify call 

272 "$ip": "0", 

273 "$geoip_disable": True, 

274 }, 

275 ) 

276 

277 _user_identified = True 

278 

279 # Only include minimal, non-identifying properties 

280 safe_properties = { 

281 "golf_version": __version__, 

282 "python_version": (f"{platform.python_version_tuple()[0]}.{platform.python_version_tuple()[1]}"), 

283 "os": platform.system(), 

284 # Explicitly disable IP tracking and GeoIP enrichment 

285 "$ip": "0", # Override IP to prevent collection 

286 "$geoip_disable": True, # Disable GeoIP enrichment 

287 } 

288 

289 # Filter properties to only include safe ones 

290 if properties: 

291 # Only include specific safe properties 

292 safe_keys = { 

293 "success", 

294 "environment", 

295 "template", 

296 "command_type", 

297 "error_type", 

298 "error_message", 

299 "shutdown_type", 

300 "exit_code", 

301 } 

302 for key in safe_keys: 

303 if key in properties: 

304 safe_properties[key] = properties[key] 

305 

306 # Send event 

307 posthog.capture( 

308 distinct_id=anonymous_id, 

309 event=event_name, 

310 properties=safe_properties, 

311 ) 

312 

313 except Exception: 

314 # Telemetry should never break the application 

315 pass 

316 

317 

318def track_command( 

319 command: str, 

320 success: bool = True, 

321 error_type: str | None = None, 

322 error_message: str | None = None, 

323) -> None: 

324 """Track a CLI command execution with minimal info. 

325 

326 Args: 

327 command: The command being executed (e.g., "init", "build", "run") 

328 success: Whether the command was successful 

329 error_type: Type of error if command failed (e.g., "ValueError", 

330 "FileNotFoundError") 

331 error_message: Sanitized error message (no sensitive data) 

332 """ 

333 properties = {"success": success} 

334 

335 # Add error details if command failed 

336 if not success and (error_type or error_message): 

337 if error_type: 

338 properties["error_type"] = error_type 

339 if error_message: 

340 # Sanitize error message - remove file paths and sensitive info 

341 sanitized_message = _sanitize_error_message(error_message) 

342 properties["error_message"] = sanitized_message 

343 

344 track_event(f"cli_{command}", properties) 

345 

346 

347def track_detailed_error( 

348 event_name: str, 

349 error: Exception, 

350 context: str | None = None, 

351 operation: str | None = None, 

352 additional_props: dict[str, Any] | None = None, 

353) -> None: 

354 """Track a detailed error with enhanced debugging information. 

355 

356 Args: 

357 event_name: Name of the error event (e.g., "cli_run_failed", "cli_build_failed") 

358 error: The exception that occurred 

359 context: Additional context about where the error occurred 

360 operation: The specific operation that failed 

361 additional_props: Additional properties to include 

362 """ 

363 import traceback 

364 import time 

365 

366 properties = { 

367 "success": False, 

368 "error_type": type(error).__name__, 

369 "error_message": _sanitize_error_message(str(error)), 

370 "timestamp": int(time.time()), 

371 } 

372 

373 # Add operation context 

374 if operation: 

375 properties["operation"] = operation 

376 if context: 

377 properties["context"] = context 

378 

379 # Add sanitized stack trace for debugging 

380 try: 

381 tb_lines = traceback.format_exception(type(error), error, error.__traceback__) 

382 # Get the last few frames (most relevant) and sanitize them 

383 relevant_frames = tb_lines[-3:] if len(tb_lines) > 3 else tb_lines 

384 sanitized_trace = [] 

385 

386 for frame in relevant_frames: 

387 # Sanitize file paths in stack trace 

388 sanitized_frame = _sanitize_error_message(frame.strip()) 

389 # Further sanitize common traceback patterns 

390 sanitized_frame = sanitized_frame.replace('File "[PATH]', 'File "[PATH]') 

391 sanitized_trace.append(sanitized_frame) 

392 

393 properties["stack_trace"] = " | ".join(sanitized_trace) 

394 

395 # Add the specific line that caused the error if available 

396 if hasattr(error, "__traceback__") and error.__traceback__: 

397 tb = error.__traceback__ 

398 while tb.tb_next: 

399 tb = tb.tb_next 

400 properties["error_line"] = tb.tb_lineno 

401 

402 except Exception: 

403 # Don't fail if we can't capture stack trace 

404 pass 

405 

406 # Add system context for debugging 

407 try: 

408 properties["python_executable"] = _sanitize_error_message(platform.python_implementation()) 

409 properties["platform_detail"] = platform.platform()[:50] # Limit length 

410 except Exception: 

411 pass 

412 

413 # Merge additional properties 

414 if additional_props: 

415 # Only include safe additional properties 

416 safe_additional_keys = { 

417 "exit_code", 

418 "shutdown_type", 

419 "environment", 

420 "template", 

421 "build_env", 

422 "transport", 

423 "component_count", 

424 "file_path", 

425 "component_type", 

426 "validation_error", 

427 "config_error", 

428 } 

429 for key, value in additional_props.items(): 

430 if key in safe_additional_keys: 

431 properties[key] = value 

432 

433 track_event(event_name, properties) 

434 

435 

436def _sanitize_error_message(message: str) -> str: 

437 """Sanitize error messages to remove sensitive information.""" 

438 import re 

439 

440 # Remove file paths but preserve filenames 

441 # Match paths with directories and capture the filename 

442 # Unix style: /path/to/file.py -> file.py 

443 message = re.sub(r"(/[^/\s]+)+/([^/\s]+)", r"\2", message) 

444 # Windows style: C:\path\to\file.py -> file.py 

445 message = re.sub(r"([A-Za-z]:\\[^\\]+\\)+([^\\]+)", r"\2", message) 

446 # Remaining absolute paths without filename 

447 message = re.sub(r"[/\\][^\s]*[/\\]", "[PATH]/", message) 

448 

449 # Remove potential API keys or tokens (common patterns) 

450 # Generic API keys (20+ alphanumeric with underscores/hyphens) 

451 message = re.sub(r"\b[a-zA-Z0-9_-]{32,}\b", "[REDACTED]", message) 

452 # Bearer tokens 

453 message = re.sub(r"Bearer\s+[a-zA-Z0-9_.-]+", "Bearer [REDACTED]", message) 

454 

455 # Remove email addresses 

456 message = re.sub(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", "[EMAIL]", message) 

457 

458 # Remove IP addresses 

459 message = re.sub(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", "[IP]", message) 

460 

461 # Remove port numbers in URLs 

462 message = re.sub(r":[0-9]{2,5}(?=/|$|\s)", ":[PORT]", message) 

463 

464 # Truncate to reasonable length 

465 if len(message) > 200: 

466 message = message[:197] + "..." 

467 

468 return message 

469 

470 

471def flush() -> None: 

472 """Flush any pending telemetry events.""" 

473 if not is_telemetry_enabled(): 

474 return 

475 

476 try: 

477 posthog.flush() 

478 except Exception: 

479 # Ignore flush errors 

480 pass 

481 

482 

483def shutdown() -> None: 

484 """Shutdown telemetry and flush pending events.""" 

485 if not is_telemetry_enabled(): 

486 return 

487 

488 try: 

489 posthog.shutdown() 

490 except Exception: 

491 # Ignore shutdown errors 

492 pass