Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1""" 

2Plotting of string "category" data: ``plot(['d', 'f', 'a'], [1, 2, 3])`` will 

3plot three points with x-axis values of 'd', 'f', 'a'. 

4 

5See :doc:`/gallery/lines_bars_and_markers/categorical_variables` for an 

6example. 

7 

8The module uses Matplotlib's `matplotlib.units` mechanism to convert from 

9strings to integers and provides a tick locator, a tick formatter, and the 

10`.UnitData` class that creates and stores the string-to-integer mapping. 

11""" 

12 

13from collections import OrderedDict 

14import dateutil.parser 

15import itertools 

16import logging 

17 

18import numpy as np 

19 

20from matplotlib import cbook, ticker, units 

21 

22 

23_log = logging.getLogger(__name__) 

24 

25 

26class StrCategoryConverter(units.ConversionInterface): 

27 @staticmethod 

28 def convert(value, unit, axis): 

29 """ 

30 Convert strings in *value* to floats using mapping information stored 

31 in the *unit* object. 

32 

33 Parameters 

34 ---------- 

35 value : str or iterable 

36 Value or list of values to be converted. 

37 unit : `.UnitData` 

38 An object mapping strings to integers. 

39 axis : `~matplotlib.axis.Axis` 

40 The axis on which the converted value is plotted. 

41 

42 .. note:: *axis* is unused. 

43 

44 Returns 

45 ------- 

46 mapped_value : float or ndarray[float] 

47 """ 

48 if unit is None: 

49 raise ValueError( 

50 'Missing category information for StrCategoryConverter; ' 

51 'this might be caused by unintendedly mixing categorical and ' 

52 'numeric data') 

53 # dtype = object preserves numerical pass throughs 

54 values = np.atleast_1d(np.array(value, dtype=object)) 

55 # pass through sequence of non binary numbers 

56 if all(units.ConversionInterface.is_numlike(v) 

57 and not isinstance(v, (str, bytes)) 

58 for v in values): 

59 return np.asarray(values, dtype=float) 

60 # force an update so it also does type checking 

61 unit.update(values) 

62 return np.vectorize(unit._mapping.__getitem__, otypes=[float])(values) 

63 

64 @staticmethod 

65 def axisinfo(unit, axis): 

66 """ 

67 Set the default axis ticks and labels. 

68 

69 Parameters 

70 ---------- 

71 unit : `.UnitData` 

72 object string unit information for value 

73 axis : `~matplotlib.axis.Axis` 

74 axis for which information is being set 

75 

76 Returns 

77 ------- 

78 axisinfo : `~matplotlib.units.AxisInfo` 

79 Information to support default tick labeling 

80 

81 .. note: axis is not used 

82 """ 

83 # locator and formatter take mapping dict because 

84 # args need to be pass by reference for updates 

85 majloc = StrCategoryLocator(unit._mapping) 

86 majfmt = StrCategoryFormatter(unit._mapping) 

87 return units.AxisInfo(majloc=majloc, majfmt=majfmt) 

88 

89 @staticmethod 

90 def default_units(data, axis): 

91 """ 

92 Set and update the `~matplotlib.axis.Axis` units. 

93 

94 Parameters 

95 ---------- 

96 data : str or iterable of str 

97 axis : `~matplotlib.axis.Axis` 

98 axis on which the data is plotted 

99 

100 Returns 

101 ------- 

102 class : `.UnitData` 

103 object storing string to integer mapping 

104 """ 

105 # the conversion call stack is default_units -> axis_info -> convert 

106 if axis.units is None: 

107 axis.set_units(UnitData(data)) 

108 else: 

109 axis.units.update(data) 

110 return axis.units 

111 

112 

113class StrCategoryLocator(ticker.Locator): 

114 """Tick at every integer mapping of the string data.""" 

115 def __init__(self, units_mapping): 

116 """ 

117 Parameters 

118 ----------- 

119 units_mapping : Dict[str, int] 

120 """ 

121 self._units = units_mapping 

122 

123 def __call__(self): 

124 return list(self._units.values()) 

125 

126 def tick_values(self, vmin, vmax): 

127 return self() 

128 

129 

130class StrCategoryFormatter(ticker.Formatter): 

131 """String representation of the data at every tick.""" 

132 def __init__(self, units_mapping): 

133 """ 

134 Parameters 

135 ---------- 

136 units_mapping : Dict[Str, int] 

137 """ 

138 self._units = units_mapping 

139 

140 def __call__(self, x, pos=None): 

141 """ 

142 Return the category label string for tick val *x*. 

143 

144 The position *pos* is ignored. 

145 """ 

146 return self.format_ticks([x])[0] 

147 

148 def format_ticks(self, values): 

149 r_mapping = {v: self._text(k) for k, v in self._units.items()} 

150 return [r_mapping.get(round(val), '') for val in values] 

151 

152 @staticmethod 

153 def _text(value): 

154 """Convert text values into utf-8 or ascii strings.""" 

155 if isinstance(value, bytes): 

156 value = value.decode(encoding='utf-8') 

157 elif not isinstance(value, str): 

158 value = str(value) 

159 return value 

160 

161 

162class UnitData: 

163 def __init__(self, data=None): 

164 """ 

165 Create mapping between unique categorical values and integer ids. 

166 

167 Parameters 

168 ---------- 

169 data : iterable 

170 sequence of string values 

171 """ 

172 self._mapping = OrderedDict() 

173 self._counter = itertools.count() 

174 if data is not None: 

175 self.update(data) 

176 

177 @staticmethod 

178 def _str_is_convertible(val): 

179 """ 

180 Helper method to check whether a string can be parsed as float or date. 

181 """ 

182 try: 

183 float(val) 

184 except ValueError: 

185 try: 

186 dateutil.parser.parse(val) 

187 except (ValueError, TypeError): 

188 # TypeError if dateutil >= 2.8.1 else ValueError 

189 return False 

190 return True 

191 

192 def update(self, data): 

193 """ 

194 Map new values to integer identifiers. 

195 

196 Parameters 

197 ---------- 

198 data : iterable 

199 sequence of string values 

200 

201 Raises 

202 ------ 

203 TypeError 

204 If the value in data is not a string, unicode, bytes type 

205 """ 

206 data = np.atleast_1d(np.array(data, dtype=object)) 

207 

208 # check if convertible to number: 

209 convertible = True 

210 for val in OrderedDict.fromkeys(data): 

211 # OrderedDict just iterates over unique values in data. 

212 cbook._check_isinstance((str, bytes), value=val) 

213 if convertible: 

214 # this will only be called so long as convertible is True. 

215 convertible = self._str_is_convertible(val) 

216 if val not in self._mapping: 

217 self._mapping[val] = next(self._counter) 

218 if convertible: 

219 _log.info('Using categorical units to plot a list of strings ' 

220 'that are all parsable as floats or dates. If these ' 

221 'strings should be plotted as numbers, cast to the ' 

222 'appropriate data type before plotting.') 

223 

224 

225# Register the converter with Matplotlib's unit framework 

226units.registry[str] = StrCategoryConverter() 

227units.registry[np.str_] = StrCategoryConverter() 

228units.registry[bytes] = StrCategoryConverter() 

229units.registry[np.bytes_] = StrCategoryConverter()