Coverage for src / mysingle / core / metrics / router.py: 0%
79 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-02 00:58 +0900
« prev ^ index » next coverage.py v7.12.0, created at 2025-12-02 00:58 +0900
1"""Enhanced metrics router with comprehensive endpoints.
3Public endpoints (no auth required, configured in Kong):
4- GET /metrics/ - Basic metrics (json/prometheus format)
5- GET /metrics/json - JSON format metrics
6- GET /metrics/prometheus - Prometheus format metrics
7- GET /metrics/health - Metrics system health
8- GET /metrics/summary - Summarized metrics
10Protected endpoints (auth required):
11- GET /metrics/routes - Detailed route metrics (authenticated users)
12- POST /metrics/reset - Reset metrics (superuser only)
13"""
15from typing import Literal
17from fastapi import APIRouter, Depends, HTTPException, Query, Response
19from ..logging import get_structured_logger
20from .collector import MetricsCollector
21from .middleware import get_metrics_collector
23logger = get_structured_logger(__name__)
26def create_metrics_router() -> APIRouter:
27 """Create router with enhanced metrics endpoints."""
28 router = APIRouter(prefix="/metrics", tags=["Metrics"])
30 @router.get("/")
31 async def get_metrics(
32 format: Literal["json", "prometheus"] = Query(
33 "json", description="Output format"
34 ),
35 collector: MetricsCollector = Depends(get_metrics_collector),
36 ):
37 """Get service metrics in JSON or Prometheus format.
39 Args:
40 format: Output format (json or prometheus)
41 collector: Metrics collector dependency
43 Returns:
44 Metrics data in requested format
45 """
46 try:
47 if format == "prometheus":
48 content = collector.get_prometheus_metrics()
49 return Response(content=content, media_type="text/plain")
50 else:
51 return collector.get_metrics()
52 except Exception as e:
53 logger.error(f"Error getting metrics: {e}")
54 raise HTTPException(
55 status_code=500, detail=f"Error retrieving metrics: {str(e)}"
56 )
58 @router.get("/json")
59 async def get_json_metrics(
60 collector: MetricsCollector = Depends(get_metrics_collector),
61 ) -> dict:
62 """Get comprehensive service metrics in JSON format."""
63 try:
64 return collector.get_metrics()
65 except Exception as e:
66 logger.error(f"Error getting JSON metrics: {e}")
67 raise HTTPException(
68 status_code=500, detail=f"Error retrieving metrics: {str(e)}"
69 )
71 @router.get("/prometheus")
72 async def get_prometheus_metrics(
73 collector: MetricsCollector = Depends(get_metrics_collector),
74 ) -> Response:
75 """Get metrics in Prometheus exposition format."""
76 try:
77 content = collector.get_prometheus_metrics()
78 return Response(content=content, media_type="text/plain")
79 except Exception as e:
80 logger.error(f"Error getting Prometheus metrics: {e}")
81 raise HTTPException(
82 status_code=500, detail=f"Error retrieving Prometheus metrics: {str(e)}"
83 )
85 @router.get("/health")
86 async def get_metrics_health(
87 collector: MetricsCollector = Depends(get_metrics_collector),
88 ) -> dict:
89 """Get health status of the metrics system."""
90 try:
91 metrics = collector.get_metrics()
93 # 간단한 헬스체크 로직
94 is_healthy = True
95 health_issues = []
97 # 에러율이 50% 이상이면 비정상
98 if metrics["error_rate"] > 0.5:
99 is_healthy = False
100 health_issues.append(f"High error rate: {metrics['error_rate']:.2%}")
102 # 활성 라우트가 없으면 비정상 (서비스가 요청을 받지 못함)
103 if metrics["active_routes"] == 0 and metrics["total_requests"] == 0:
104 is_healthy = False
105 health_issues.append("No active routes or requests")
107 return {
108 "status": "healthy" if is_healthy else "unhealthy",
109 "service": metrics["service"],
110 "uptime_seconds": metrics["uptime_seconds"],
111 "total_requests": metrics["total_requests"],
112 "error_rate": metrics["error_rate"],
113 "active_routes": metrics["active_routes"],
114 "issues": health_issues,
115 "timestamp": metrics["timestamp"],
116 }
117 except Exception as e:
118 logger.error(f"Error getting metrics health: {e}")
119 raise HTTPException(
120 status_code=500, detail=f"Error checking metrics health: {str(e)}"
121 )
123 @router.get("/summary")
124 async def get_metrics_summary(
125 collector: MetricsCollector = Depends(get_metrics_collector),
126 ) -> dict:
127 """Get summarized metrics without detailed route information."""
128 try:
129 full_metrics = collector.get_metrics()
131 # 요약된 정보만 반환
132 return {
133 "service": full_metrics["service"],
134 "timestamp": full_metrics["timestamp"],
135 "uptime_seconds": full_metrics["uptime_seconds"],
136 "total_requests": full_metrics["total_requests"],
137 "total_errors": full_metrics["total_errors"],
138 "error_rate": full_metrics["error_rate"],
139 "requests_per_second": full_metrics["requests_per_second"],
140 "active_routes": full_metrics["active_routes"],
141 "config": full_metrics["config"],
142 }
143 except Exception as e:
144 logger.error(f"Error getting metrics summary: {e}")
145 raise HTTPException(
146 status_code=500, detail=f"Error retrieving metrics summary: {str(e)}"
147 )
149 @router.get("/routes")
150 async def get_route_metrics(
151 route_filter: str | None = Query(None, description="Filter routes by pattern"),
152 collector: MetricsCollector = Depends(get_metrics_collector),
153 ) -> dict:
154 """Get detailed metrics for specific routes (authenticated users only).
156 This endpoint exposes internal route structure and performance metrics,
157 so it requires authentication.
159 Args:
160 route_filter: Optional filter pattern for route names
161 collector: Metrics collector dependency
162 current_user: Authenticated user (injected by dependency)
164 Returns:
165 Detailed route metrics
166 """
167 try:
168 full_metrics = collector.get_metrics()
169 routes = full_metrics["routes"]
171 if route_filter:
172 # 간단한 패턴 필터링
173 filtered_routes = {
174 route_key: route_data
175 for route_key, route_data in routes.items()
176 if route_filter.lower() in route_key.lower()
177 }
178 routes = filtered_routes
180 return {
181 "service": full_metrics["service"],
182 "timestamp": full_metrics["timestamp"],
183 "total_routes": len(routes),
184 "routes": routes,
185 }
186 except Exception as e:
187 logger.error(f"Error getting route metrics: {e}")
188 raise HTTPException(
189 status_code=500, detail=f"Error retrieving route metrics: {str(e)}"
190 )
192 @router.post("/reset")
193 async def reset_metrics(
194 collector: MetricsCollector = Depends(get_metrics_collector),
195 ) -> dict:
196 """Reset all metrics (superuser only).
198 This is a destructive operation that clears all collected metrics data.
199 Restricted to superuser access only for security.
201 Warning: This will permanently clear all collected metrics data.
202 """
203 try:
204 collector.reset_metrics()
205 logger.info(f"Metrics reset for service: {collector.service_name}")
206 return {
207 "status": "success",
208 "message": f"Metrics reset for service: {collector.service_name}",
209 "timestamp": collector.start_time,
210 }
211 except Exception as e:
212 logger.error(f"Error resetting metrics: {e}")
213 raise HTTPException(
214 status_code=500, detail=f"Error resetting metrics: {str(e)}"
215 )
217 return router