Coverage for src / mysingle / core / metrics / router.py: 0%

79 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-12-02 00:58 +0900

1"""Enhanced metrics router with comprehensive endpoints. 

2 

3Public endpoints (no auth required, configured in Kong): 

4- GET /metrics/ - Basic metrics (json/prometheus format) 

5- GET /metrics/json - JSON format metrics 

6- GET /metrics/prometheus - Prometheus format metrics 

7- GET /metrics/health - Metrics system health 

8- GET /metrics/summary - Summarized metrics 

9 

10Protected endpoints (auth required): 

11- GET /metrics/routes - Detailed route metrics (authenticated users) 

12- POST /metrics/reset - Reset metrics (superuser only) 

13""" 

14 

15from typing import Literal 

16 

17from fastapi import APIRouter, Depends, HTTPException, Query, Response 

18 

19from ..logging import get_structured_logger 

20from .collector import MetricsCollector 

21from .middleware import get_metrics_collector 

22 

23logger = get_structured_logger(__name__) 

24 

25 

26def create_metrics_router() -> APIRouter: 

27 """Create router with enhanced metrics endpoints.""" 

28 router = APIRouter(prefix="/metrics", tags=["Metrics"]) 

29 

30 @router.get("/") 

31 async def get_metrics( 

32 format: Literal["json", "prometheus"] = Query( 

33 "json", description="Output format" 

34 ), 

35 collector: MetricsCollector = Depends(get_metrics_collector), 

36 ): 

37 """Get service metrics in JSON or Prometheus format. 

38 

39 Args: 

40 format: Output format (json or prometheus) 

41 collector: Metrics collector dependency 

42 

43 Returns: 

44 Metrics data in requested format 

45 """ 

46 try: 

47 if format == "prometheus": 

48 content = collector.get_prometheus_metrics() 

49 return Response(content=content, media_type="text/plain") 

50 else: 

51 return collector.get_metrics() 

52 except Exception as e: 

53 logger.error(f"Error getting metrics: {e}") 

54 raise HTTPException( 

55 status_code=500, detail=f"Error retrieving metrics: {str(e)}" 

56 ) 

57 

58 @router.get("/json") 

59 async def get_json_metrics( 

60 collector: MetricsCollector = Depends(get_metrics_collector), 

61 ) -> dict: 

62 """Get comprehensive service metrics in JSON format.""" 

63 try: 

64 return collector.get_metrics() 

65 except Exception as e: 

66 logger.error(f"Error getting JSON metrics: {e}") 

67 raise HTTPException( 

68 status_code=500, detail=f"Error retrieving metrics: {str(e)}" 

69 ) 

70 

71 @router.get("/prometheus") 

72 async def get_prometheus_metrics( 

73 collector: MetricsCollector = Depends(get_metrics_collector), 

74 ) -> Response: 

75 """Get metrics in Prometheus exposition format.""" 

76 try: 

77 content = collector.get_prometheus_metrics() 

78 return Response(content=content, media_type="text/plain") 

79 except Exception as e: 

80 logger.error(f"Error getting Prometheus metrics: {e}") 

81 raise HTTPException( 

82 status_code=500, detail=f"Error retrieving Prometheus metrics: {str(e)}" 

83 ) 

84 

85 @router.get("/health") 

86 async def get_metrics_health( 

87 collector: MetricsCollector = Depends(get_metrics_collector), 

88 ) -> dict: 

89 """Get health status of the metrics system.""" 

90 try: 

91 metrics = collector.get_metrics() 

92 

93 # 간단한 헬스체크 로직 

94 is_healthy = True 

95 health_issues = [] 

96 

97 # 에러율이 50% 이상이면 비정상 

98 if metrics["error_rate"] > 0.5: 

99 is_healthy = False 

100 health_issues.append(f"High error rate: {metrics['error_rate']:.2%}") 

101 

102 # 활성 라우트가 없으면 비정상 (서비스가 요청을 받지 못함) 

103 if metrics["active_routes"] == 0 and metrics["total_requests"] == 0: 

104 is_healthy = False 

105 health_issues.append("No active routes or requests") 

106 

107 return { 

108 "status": "healthy" if is_healthy else "unhealthy", 

109 "service": metrics["service"], 

110 "uptime_seconds": metrics["uptime_seconds"], 

111 "total_requests": metrics["total_requests"], 

112 "error_rate": metrics["error_rate"], 

113 "active_routes": metrics["active_routes"], 

114 "issues": health_issues, 

115 "timestamp": metrics["timestamp"], 

116 } 

117 except Exception as e: 

118 logger.error(f"Error getting metrics health: {e}") 

119 raise HTTPException( 

120 status_code=500, detail=f"Error checking metrics health: {str(e)}" 

121 ) 

122 

123 @router.get("/summary") 

124 async def get_metrics_summary( 

125 collector: MetricsCollector = Depends(get_metrics_collector), 

126 ) -> dict: 

127 """Get summarized metrics without detailed route information.""" 

128 try: 

129 full_metrics = collector.get_metrics() 

130 

131 # 요약된 정보만 반환 

132 return { 

133 "service": full_metrics["service"], 

134 "timestamp": full_metrics["timestamp"], 

135 "uptime_seconds": full_metrics["uptime_seconds"], 

136 "total_requests": full_metrics["total_requests"], 

137 "total_errors": full_metrics["total_errors"], 

138 "error_rate": full_metrics["error_rate"], 

139 "requests_per_second": full_metrics["requests_per_second"], 

140 "active_routes": full_metrics["active_routes"], 

141 "config": full_metrics["config"], 

142 } 

143 except Exception as e: 

144 logger.error(f"Error getting metrics summary: {e}") 

145 raise HTTPException( 

146 status_code=500, detail=f"Error retrieving metrics summary: {str(e)}" 

147 ) 

148 

149 @router.get("/routes") 

150 async def get_route_metrics( 

151 route_filter: str | None = Query(None, description="Filter routes by pattern"), 

152 collector: MetricsCollector = Depends(get_metrics_collector), 

153 ) -> dict: 

154 """Get detailed metrics for specific routes (authenticated users only). 

155 

156 This endpoint exposes internal route structure and performance metrics, 

157 so it requires authentication. 

158 

159 Args: 

160 route_filter: Optional filter pattern for route names 

161 collector: Metrics collector dependency 

162 current_user: Authenticated user (injected by dependency) 

163 

164 Returns: 

165 Detailed route metrics 

166 """ 

167 try: 

168 full_metrics = collector.get_metrics() 

169 routes = full_metrics["routes"] 

170 

171 if route_filter: 

172 # 간단한 패턴 필터링 

173 filtered_routes = { 

174 route_key: route_data 

175 for route_key, route_data in routes.items() 

176 if route_filter.lower() in route_key.lower() 

177 } 

178 routes = filtered_routes 

179 

180 return { 

181 "service": full_metrics["service"], 

182 "timestamp": full_metrics["timestamp"], 

183 "total_routes": len(routes), 

184 "routes": routes, 

185 } 

186 except Exception as e: 

187 logger.error(f"Error getting route metrics: {e}") 

188 raise HTTPException( 

189 status_code=500, detail=f"Error retrieving route metrics: {str(e)}" 

190 ) 

191 

192 @router.post("/reset") 

193 async def reset_metrics( 

194 collector: MetricsCollector = Depends(get_metrics_collector), 

195 ) -> dict: 

196 """Reset all metrics (superuser only). 

197 

198 This is a destructive operation that clears all collected metrics data. 

199 Restricted to superuser access only for security. 

200 

201 Warning: This will permanently clear all collected metrics data. 

202 """ 

203 try: 

204 collector.reset_metrics() 

205 logger.info(f"Metrics reset for service: {collector.service_name}") 

206 return { 

207 "status": "success", 

208 "message": f"Metrics reset for service: {collector.service_name}", 

209 "timestamp": collector.start_time, 

210 } 

211 except Exception as e: 

212 logger.error(f"Error resetting metrics: {e}") 

213 raise HTTPException( 

214 status_code=500, detail=f"Error resetting metrics: {str(e)}" 

215 ) 

216 

217 return router