Coverage for src / tracekit / reporting / chart_selection.py: 99%

54 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-01-11 23:04 +0000

1"""Automated chart type selection for TraceKit reports. 

2 

3This module provides intelligent chart type selection based on data 

4characteristics to optimize data visualization in reports. 

5 

6 

7Example: 

8 >>> from tracekit.reporting import auto_select_chart 

9 >>> chart_type = auto_select_chart("time_series", (1000, 2)) 

10 >>> print(chart_type) # "line" 

11""" 

12 

13from __future__ import annotations 

14 

15from typing import TYPE_CHECKING, Literal 

16 

17import numpy as np 

18 

19if TYPE_CHECKING: 

20 from numpy.typing import NDArray 

21 

22ChartType = Literal["line", "scatter", "bar", "histogram", "heatmap", "pie", "spectrum"] 

23 

24 

25def auto_select_chart( 

26 data_type: str, 

27 data_shape: tuple[int, ...], 

28 *, 

29 data: NDArray[np.float64] | None = None, 

30) -> ChartType: 

31 """Automatically select appropriate chart type based on data characteristics. 

32 

33 Args: 

34 data_type: Type of data - one of: 

35 - "time_series": Time-domain waveform data 

36 - "frequency": Frequency-domain spectral data 

37 - "distribution": Statistical distribution data 

38 - "comparison": Comparative measurements 

39 - "correlation": Correlation or scatter data 

40 - "categorical": Categorical comparison data 

41 - "matrix": 2D matrix data 

42 - "parts": Part-to-whole relationships 

43 data_shape: Shape of the data array (rows, [columns]). 

44 data: Optional actual data array for additional analysis. 

45 

46 Returns: 

47 Recommended chart type: 'line', 'scatter', 'bar', 'histogram', 

48 'heatmap', 'pie', or 'spectrum'. 

49 

50 Example: 

51 >>> # Time series data → line plot 

52 >>> auto_select_chart("time_series", (1000, 2)) 

53 'line' 

54 

55 >>> # Distribution data → histogram 

56 >>> auto_select_chart("distribution", (500,)) 

57 'histogram' 

58 

59 >>> # Categorical comparison → bar chart 

60 >>> auto_select_chart("categorical", (5,)) 

61 'bar' 

62 

63 >>> # 2D matrix → heatmap 

64 >>> auto_select_chart("matrix", (100, 100)) 

65 'heatmap' 

66 

67 References: 

68 REPORT-028: Automated Chart Type Selection 

69 """ 

70 # Time series → line plot 

71 if data_type == "time_series": 

72 return "line" 

73 

74 # Frequency data → spectrum plot (log scale) 

75 if data_type == "frequency": 

76 return "spectrum" 

77 

78 # Distribution → histogram or box plot 

79 if data_type == "distribution": 

80 return "histogram" 

81 

82 # Categorical comparison → bar chart 

83 if data_type == "categorical": 

84 # If very few categories, pie chart might be appropriate 

85 if len(data_shape) > 0 and data_shape[0] <= 6 and data is not None and np.all(data >= 0): 

86 # Check if data represents parts of a whole 

87 total = np.sum(data) 

88 if total > 0 and np.allclose(data / total * 100, data / total * 100): 88 ↛ 90line 88 didn't jump to line 90 because the condition on line 88 was always true

89 return "pie" 

90 return "bar" 

91 

92 # Comparison (continuous) → scatter plot 

93 if data_type == "comparison": 

94 # If 2D data with moderate size, scatter plot 

95 if len(data_shape) >= 2 and data_shape[0] < 10000: 

96 return "scatter" 

97 # Large comparison data → bar chart 

98 return "bar" 

99 

100 # Correlation → scatter plot with potential regression 

101 if data_type == "correlation": 

102 return "scatter" 

103 

104 # 2D matrix → heatmap 

105 if data_type == "matrix": 

106 return "heatmap" 

107 

108 # Parts-to-whole → pie chart 

109 if data_type == "parts": 

110 return "pie" 

111 

112 # Default based on shape 

113 if len(data_shape) == 1: 

114 # 1D data: histogram for distributions, bar for small sets 

115 if data_shape[0] < 20: 

116 return "bar" 

117 return "histogram" 

118 elif len(data_shape) == 2: 

119 # 2D data: heatmap for square-ish matrices, scatter for point clouds 

120 if data_shape[0] > 50 and data_shape[1] > 50: 

121 return "heatmap" 

122 return "scatter" 

123 

124 # Fallback to line plot 

125 return "line" 

126 

127 

128def recommend_chart_with_reasoning( 

129 data_type: str, 

130 data_shape: tuple[int, ...], 

131 *, 

132 data: NDArray[np.float64] | None = None, 

133) -> dict[str, str | ChartType]: 

134 """Recommend chart type with reasoning explanation. 

135 

136 Args: 

137 data_type: Type of data (see auto_select_chart). 

138 data_shape: Shape of the data array. 

139 data: Optional actual data array. 

140 

141 Returns: 

142 Dictionary with 'chart_type' and 'reasoning' keys. 

143 

144 Example: 

145 >>> result = recommend_chart_with_reasoning("time_series", (1000, 2)) 

146 >>> print(result['chart_type']) # "line" 

147 >>> print(result['reasoning']) # "Time series data best shown with line plot" 

148 

149 References: 

150 REPORT-028: Automated Chart Type Selection 

151 """ 

152 chart_type = auto_select_chart(data_type, data_shape, data=data) 

153 

154 # Generate reasoning 

155 reasoning_map = { 

156 "line": "Time series or sequential data best visualized with line plot", 

157 "scatter": "Point data or correlation best shown with scatter plot", 

158 "bar": "Categorical or discrete comparison best shown with bar chart", 

159 "histogram": "Distribution data best represented as histogram", 

160 "heatmap": "2D matrix data best visualized as heatmap", 

161 "pie": "Part-to-whole relationship best shown with pie chart", 

162 "spectrum": "Frequency domain data best shown with log-scale spectrum plot", 

163 } 

164 

165 reasoning = reasoning_map.get( 

166 chart_type, f"Data characteristics suggest {chart_type} visualization" 

167 ) 

168 

169 return { 

170 "chart_type": chart_type, 

171 "reasoning": reasoning, 

172 } 

173 

174 

175def get_axis_scaling( 

176 data_type: str, 

177 data: NDArray[np.float64] | None = None, 

178) -> dict[str, str]: 

179 """Recommend axis scaling (linear vs log) based on data type. 

180 

181 Args: 

182 data_type: Type of data. 

183 data: Optional actual data array for range analysis. 

184 

185 Returns: 

186 Dictionary with 'x_scale' and 'y_scale' keys ('linear' or 'log'). 

187 

188 Example: 

189 >>> scaling = get_axis_scaling("frequency") 

190 >>> print(scaling) # {'x_scale': 'log', 'y_scale': 'log'} 

191 

192 References: 

193 REPORT-028: Automated Chart Type Selection 

194 """ 

195 # Default linear scaling 

196 x_scale = "linear" 

197 y_scale = "linear" 

198 

199 # Frequency data: both axes log 

200 if data_type == "frequency": 

201 x_scale = "log" 

202 y_scale = "log" 

203 

204 # Check data range if provided 

205 if data is not None and len(data) > 0: 

206 # If data spans > 3 orders of magnitude, use log scale 

207 data_min = np.min(data[data > 0]) if np.any(data > 0) else 0 

208 data_max = np.max(data) 

209 if data_min > 0 and data_max / data_min > 1000: 

210 y_scale = "log" 

211 

212 return { 

213 "x_scale": x_scale, 

214 "y_scale": y_scale, 

215 } 

216 

217 

218__all__ = [ 

219 "ChartType", 

220 "auto_select_chart", 

221 "get_axis_scaling", 

222 "recommend_chart_with_reasoning", 

223]