Coverage for /Users/antonigmitruk/golf/src/golf/core/telemetry.py: 0%
201 statements
« prev ^ index » next coverage.py v7.6.12, created at 2025-08-16 18:46 +0200
« prev ^ index » next coverage.py v7.6.12, created at 2025-08-16 18:46 +0200
1"""Telemetry module for anonymous usage tracking with PostHog."""
3import hashlib
4import json
5import os
6import platform
7import uuid
8from pathlib import Path
9from typing import Any
11import posthog
12from rich.console import Console
14from golf import __version__
16console = Console()
18# PostHog configuration
19# This is a client-side API key, safe to be public
20# Users can override with GOLF_POSTHOG_API_KEY environment variable
21DEFAULT_POSTHOG_API_KEY = "phc_7ccsDDxoC5tK5hodlrs2moGC74cThRzcN63flRYPWGl"
22POSTHOG_API_KEY = os.environ.get("GOLF_POSTHOG_API_KEY", DEFAULT_POSTHOG_API_KEY)
23POSTHOG_HOST = "https://us.i.posthog.com"
25# Telemetry state
26_telemetry_enabled: bool | None = None
27_anonymous_id: str | None = None
28_user_identified: bool = False # Track if we've already identified the user
31def _is_test_mode() -> bool:
32 """Check if we're in test mode."""
33 return os.environ.get("GOLF_TEST_MODE", "").lower() in ("1", "true", "yes", "on")
36def _ensure_posthog_disabled_in_test_mode() -> None:
37 """Ensure PostHog is disabled when in test mode."""
38 if _is_test_mode() and not posthog.disabled:
39 posthog.disabled = True
42def get_telemetry_config_path() -> Path:
43 """Get the path to the telemetry configuration file."""
44 return Path.home() / ".golf" / "telemetry.json"
47def save_telemetry_preference(enabled: bool) -> None:
48 """Save telemetry preference to persistent storage."""
49 config_path = get_telemetry_config_path()
50 config_path.parent.mkdir(parents=True, exist_ok=True)
52 config = {"enabled": enabled, "version": 1}
54 try:
55 with open(config_path, "w") as f:
56 json.dump(config, f)
57 except Exception:
58 # Don't fail if we can't save the preference
59 pass
62def load_telemetry_preference() -> bool | None:
63 """Load telemetry preference from persistent storage."""
64 config_path = get_telemetry_config_path()
66 if not config_path.exists():
67 return None
69 try:
70 with open(config_path) as f:
71 config = json.load(f)
72 return config.get("enabled")
73 except Exception:
74 return None
77def is_telemetry_enabled() -> bool:
78 """Check if telemetry is enabled.
80 Checks in order:
81 1. Cached value
82 2. GOLF_TEST_MODE environment variable (always disabled in test mode)
83 3. GOLF_TELEMETRY environment variable
84 4. Persistent preference file
85 5. Default to False (opt-in model)
86 """
87 global _telemetry_enabled
89 if _telemetry_enabled is not None:
90 return _telemetry_enabled
92 # Check if we're in test mode (highest priority after cache)
93 if _is_test_mode():
94 _telemetry_enabled = False
95 return False
97 # Check environment variables (second highest priority)
98 env_telemetry = os.environ.get("GOLF_TELEMETRY", "").lower()
99 if env_telemetry in ("0", "false", "no", "off"):
100 _telemetry_enabled = False
101 return False
102 elif env_telemetry in ("1", "true", "yes", "on"):
103 _telemetry_enabled = True
104 return True
106 # Check persistent preference
107 saved_preference = load_telemetry_preference()
108 if saved_preference is not None:
109 _telemetry_enabled = saved_preference
110 return saved_preference
112 # Default to disabled (opt-in model)
113 _telemetry_enabled = False
114 return False
117def set_telemetry_enabled(enabled: bool, persist: bool = True) -> None:
118 """Set telemetry enabled state.
120 Args:
121 enabled: Whether telemetry should be enabled
122 persist: Whether to save this preference persistently
123 """
124 global _telemetry_enabled
125 _telemetry_enabled = enabled
127 if persist:
128 save_telemetry_preference(enabled)
131def get_anonymous_id() -> str:
132 """Get or create a persistent anonymous ID for this machine.
134 The ID is stored in the user's home directory and is unique per installation.
135 """
136 global _anonymous_id
138 if _anonymous_id:
139 return _anonymous_id
141 # Try to load existing ID
142 id_file = Path.home() / ".golf" / "telemetry_id"
144 if id_file.exists():
145 try:
146 _anonymous_id = id_file.read_text().strip()
147 # Check if ID is in the old format (no hyphen between hash and
148 # random component)
149 # Old format: golf-[8 chars hash][8 chars random]
150 # New format: golf-[8 chars hash]-[8 chars random]
151 if _anonymous_id and _anonymous_id.startswith("golf-") and len(_anonymous_id) == 21:
152 # This is likely the old format, regenerate
153 _anonymous_id = None
154 elif _anonymous_id:
155 return _anonymous_id
156 except Exception:
157 pass
159 # Generate new ID with more unique data
160 # Use only non-identifying system information
162 # Combine non-identifying factors for uniqueness
163 machine_data = f"{platform.machine()}-{platform.system()}-{platform.python_version()}"
164 machine_hash = hashlib.sha256(machine_data.encode()).hexdigest()[:8]
166 # Add a random component to ensure uniqueness
167 random_component = str(uuid.uuid4()).split("-")[0] # First 8 chars of UUID
169 # Use hyphen separator for clarity and ensure PostHog treats these as different IDs
170 _anonymous_id = f"golf-{machine_hash}-{random_component}"
172 # Try to save for next time
173 try:
174 id_file.parent.mkdir(parents=True, exist_ok=True)
175 id_file.write_text(_anonymous_id)
176 except Exception:
177 # Not critical if we can't save
178 pass
180 return _anonymous_id
183def initialize_telemetry() -> None:
184 """Initialize PostHog telemetry if enabled."""
185 # Ensure PostHog is disabled in test mode
186 _ensure_posthog_disabled_in_test_mode()
188 # Don't initialize if PostHog is disabled (test mode)
189 if posthog.disabled:
190 return
192 if not is_telemetry_enabled():
193 return
195 # Skip initialization if no valid API key (empty or placeholder)
196 if not POSTHOG_API_KEY or POSTHOG_API_KEY.startswith("phc_YOUR"):
197 return
199 try:
200 posthog.project_api_key = POSTHOG_API_KEY
201 posthog.host = POSTHOG_HOST
203 # Disable PostHog's own logging to avoid noise
204 posthog.disabled = False
205 posthog.debug = False
207 # Disable IP collection and GeoIP enrichment at the SDK level
208 posthog.set_global_event_properties(
209 {
210 "$ip": "0", # Override IP with dummy value to prevent collection
211 "$geoip_disable": True, # Disable all GeoIP enrichment
212 }
213 )
215 except Exception:
216 # Telemetry should never break the application
217 pass
220def track_event(event_name: str, properties: dict[str, Any] | None = None) -> None:
221 """Track an anonymous event with NO IP address or geolocation data.
223 IP collection and GeoIP enrichment are disabled at the SDK level to ensure
224 complete privacy protection. No IP addresses or location data ever reach PostHog.
226 Args:
227 event_name: Name of the event (e.g., "cli_init", "cli_build")
228 properties: Optional properties to include with the event
229 """
230 global _user_identified
232 # Ensure PostHog is disabled in test mode
233 _ensure_posthog_disabled_in_test_mode()
235 # Early return if PostHog is disabled (test mode)
236 if posthog.disabled:
237 return
239 if not is_telemetry_enabled():
240 return
242 # Skip if no valid API key (empty or placeholder)
243 if not POSTHOG_API_KEY or POSTHOG_API_KEY.startswith("phc_YOUR"):
244 return
246 try:
247 # Initialize if needed
248 if posthog.project_api_key != POSTHOG_API_KEY:
249 initialize_telemetry()
251 # Get anonymous ID
252 anonymous_id = get_anonymous_id()
254 # Only identify the user once per session
255 if not _user_identified:
256 # Set person properties to differentiate installations
257 # Only include non-identifying information
258 person_properties = {
259 "$set": {
260 "golf_version": __version__,
261 "os": platform.system(),
262 "python_version": (f"{platform.python_version_tuple()[0]}.{platform.python_version_tuple()[1]}"),
263 }
264 }
266 # Identify the user with properties (IP tracking disabled)
267 posthog.identify(
268 distinct_id=anonymous_id,
269 properties={
270 **person_properties,
271 # Explicitly disable IP tracking in identify call
272 "$ip": "0",
273 "$geoip_disable": True,
274 },
275 )
277 _user_identified = True
279 # Only include minimal, non-identifying properties
280 safe_properties = {
281 "golf_version": __version__,
282 "python_version": (f"{platform.python_version_tuple()[0]}.{platform.python_version_tuple()[1]}"),
283 "os": platform.system(),
284 # Explicitly disable IP tracking and GeoIP enrichment
285 "$ip": "0", # Override IP to prevent collection
286 "$geoip_disable": True, # Disable GeoIP enrichment
287 }
289 # Filter properties to only include safe ones
290 if properties:
291 # Only include specific safe properties
292 safe_keys = {
293 "success",
294 "environment",
295 "template",
296 "command_type",
297 "error_type",
298 "error_message",
299 "shutdown_type",
300 "exit_code",
301 }
302 for key in safe_keys:
303 if key in properties:
304 safe_properties[key] = properties[key]
306 # Send event
307 posthog.capture(
308 distinct_id=anonymous_id,
309 event=event_name,
310 properties=safe_properties,
311 )
313 except Exception:
314 # Telemetry should never break the application
315 pass
318def track_command(
319 command: str,
320 success: bool = True,
321 error_type: str | None = None,
322 error_message: str | None = None,
323) -> None:
324 """Track a CLI command execution with minimal info.
326 Args:
327 command: The command being executed (e.g., "init", "build", "run")
328 success: Whether the command was successful
329 error_type: Type of error if command failed (e.g., "ValueError",
330 "FileNotFoundError")
331 error_message: Sanitized error message (no sensitive data)
332 """
333 properties = {"success": success}
335 # Add error details if command failed
336 if not success and (error_type or error_message):
337 if error_type:
338 properties["error_type"] = error_type
339 if error_message:
340 # Sanitize error message - remove file paths and sensitive info
341 sanitized_message = _sanitize_error_message(error_message)
342 properties["error_message"] = sanitized_message
344 track_event(f"cli_{command}", properties)
347def track_detailed_error(
348 event_name: str,
349 error: Exception,
350 context: str | None = None,
351 operation: str | None = None,
352 additional_props: dict[str, Any] | None = None,
353) -> None:
354 """Track a detailed error with enhanced debugging information.
356 Args:
357 event_name: Name of the error event (e.g., "cli_run_failed", "cli_build_failed")
358 error: The exception that occurred
359 context: Additional context about where the error occurred
360 operation: The specific operation that failed
361 additional_props: Additional properties to include
362 """
363 import traceback
364 import time
366 properties = {
367 "success": False,
368 "error_type": type(error).__name__,
369 "error_message": _sanitize_error_message(str(error)),
370 "timestamp": int(time.time()),
371 }
373 # Add operation context
374 if operation:
375 properties["operation"] = operation
376 if context:
377 properties["context"] = context
379 # Add sanitized stack trace for debugging
380 try:
381 tb_lines = traceback.format_exception(type(error), error, error.__traceback__)
382 # Get the last few frames (most relevant) and sanitize them
383 relevant_frames = tb_lines[-3:] if len(tb_lines) > 3 else tb_lines
384 sanitized_trace = []
386 for frame in relevant_frames:
387 # Sanitize file paths in stack trace
388 sanitized_frame = _sanitize_error_message(frame.strip())
389 # Further sanitize common traceback patterns
390 sanitized_frame = sanitized_frame.replace('File "[PATH]', 'File "[PATH]')
391 sanitized_trace.append(sanitized_frame)
393 properties["stack_trace"] = " | ".join(sanitized_trace)
395 # Add the specific line that caused the error if available
396 if hasattr(error, "__traceback__") and error.__traceback__:
397 tb = error.__traceback__
398 while tb.tb_next:
399 tb = tb.tb_next
400 properties["error_line"] = tb.tb_lineno
402 except Exception:
403 # Don't fail if we can't capture stack trace
404 pass
406 # Add system context for debugging
407 try:
408 properties["python_executable"] = _sanitize_error_message(platform.python_implementation())
409 properties["platform_detail"] = platform.platform()[:50] # Limit length
410 except Exception:
411 pass
413 # Merge additional properties
414 if additional_props:
415 # Only include safe additional properties
416 safe_additional_keys = {
417 "exit_code",
418 "shutdown_type",
419 "environment",
420 "template",
421 "build_env",
422 "transport",
423 "component_count",
424 "file_path",
425 "component_type",
426 "validation_error",
427 "config_error",
428 }
429 for key, value in additional_props.items():
430 if key in safe_additional_keys:
431 properties[key] = value
433 track_event(event_name, properties)
436def _sanitize_error_message(message: str) -> str:
437 """Sanitize error messages to remove sensitive information."""
438 import re
440 # Remove file paths but preserve filenames
441 # Match paths with directories and capture the filename
442 # Unix style: /path/to/file.py -> file.py
443 message = re.sub(r"(/[^/\s]+)+/([^/\s]+)", r"\2", message)
444 # Windows style: C:\path\to\file.py -> file.py
445 message = re.sub(r"([A-Za-z]:\\[^\\]+\\)+([^\\]+)", r"\2", message)
446 # Remaining absolute paths without filename
447 message = re.sub(r"[/\\][^\s]*[/\\]", "[PATH]/", message)
449 # Remove potential API keys or tokens (common patterns)
450 # Generic API keys (20+ alphanumeric with underscores/hyphens)
451 message = re.sub(r"\b[a-zA-Z0-9_-]{32,}\b", "[REDACTED]", message)
452 # Bearer tokens
453 message = re.sub(r"Bearer\s+[a-zA-Z0-9_.-]+", "Bearer [REDACTED]", message)
455 # Remove email addresses
456 message = re.sub(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b", "[EMAIL]", message)
458 # Remove IP addresses
459 message = re.sub(r"\b(?:[0-9]{1,3}\.){3}[0-9]{1,3}\b", "[IP]", message)
461 # Remove port numbers in URLs
462 message = re.sub(r":[0-9]{2,5}(?=/|$|\s)", ":[PORT]", message)
464 # Truncate to reasonable length
465 if len(message) > 200:
466 message = message[:197] + "..."
468 return message
471def flush() -> None:
472 """Flush any pending telemetry events."""
473 if not is_telemetry_enabled():
474 return
476 try:
477 posthog.flush()
478 except Exception:
479 # Ignore flush errors
480 pass
483def shutdown() -> None:
484 """Shutdown telemetry and flush pending events."""
485 if not is_telemetry_enabled():
486 return
488 try:
489 posthog.shutdown()
490 except Exception:
491 # Ignore shutdown errors
492 pass