Coverage for curator/helpers/getters.py: 94%

135 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-20 21:00 -0600

1"""Utility functions that get things""" 

2# :pylint disable= 

3import logging 

4import re 

5from elasticsearch8 import exceptions as es8exc 

6from es_client.defaults import VERSION_MAX, VERSION_MIN 

7from es_client.builder import Builder 

8from curator.exceptions import ClientException, CuratorException, FailedExecution, MissingArgument 

9 

10def byte_size(num, suffix='B'): 

11 """ 

12 :param num: The number of byte 

13 :param suffix: An arbitrary suffix, like ``Bytes`` 

14 

15 :type num: int 

16 :type suffix: str 

17 

18 :returns: A formatted string indicating the size in bytes, with the proper unit, 

19 e.g. KB, MB, GB, TB, etc. 

20 :rtype: float 

21 """ 

22 for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: 

23 if abs(num) < 1024.0: 

24 return f'{num:3.1f}{unit}{suffix}' 

25 num /= 1024.0 

26 return f'{num:.1f}Y{suffix}' 

27 

28def get_alias_actions(oldidx, newidx, aliases): 

29 """ 

30 :param oldidx: The old index name 

31 :param newidx: The new index name 

32 :param aliases: The aliases 

33 

34 :type oldidx: str 

35 :type newidx: str 

36 :type aliases: dict 

37 

38 :returns: A list of actions suitable for 

39 :py:meth:`~.elasticsearch.client.IndicesClient.update_aliases` ``actions`` kwarg. 

40 :rtype: list 

41 """ 

42 actions = [] 

43 for alias in aliases.keys(): 

44 actions.append({'remove': {'index': oldidx, 'alias': alias}}) 

45 actions.append({'add': {'index': newidx, 'alias': alias}}) 

46 return actions 

47 

48def get_client( 

49 configdict=None, configfile=None, autoconnect=False, version_min=VERSION_MIN, 

50 version_max=VERSION_MAX): 

51 """Get an Elasticsearch Client using :py:class:`es_client.Builder` 

52 

53 Build a client out of settings from `configfile` or `configdict` 

54 If neither `configfile` nor `configdict` is provided, empty defaults will be used. 

55 If both are provided, `configdict` will be used, and `configfile` ignored. 

56 

57 :param configdict: A configuration dictionary 

58 :param configfile: A configuration file 

59 :param autoconnect: Connect to client automatically 

60 :param verion_min: Minimum acceptable version of Elasticsearch (major, minor, patch) 

61 :param verion_max: Maximum acceptable version of Elasticsearch (major, minor, patch) 

62 

63 :type configdict: dict 

64 :type configfile: str 

65 :type autoconnect: bool 

66 :type version_min: tuple 

67 :type version_max: tuple 

68 

69 :returns: A client connection object 

70 :rtype: :py:class:`~.elasticsearch.Elasticsearch` 

71 """ 

72 logger = logging.getLogger(__name__) 

73 logger.info('Creating client object and testing connection') 

74 

75 builder = Builder( 

76 configdict=configdict, configfile=configfile, autoconnect=autoconnect, 

77 version_min=version_min, version_max=version_max 

78 ) 

79 

80 try: 

81 builder.connect() 

82 except Exception as exc: 

83 logger.critical('Exception encountered: %s', exc) 

84 raise ClientException from exc 

85 

86 return builder.client 

87 

88def get_data_tiers(client): 

89 """ 

90 Get all valid data tiers from the node roles of each node in the cluster by polling each node 

91 

92 :param client: A client connection object 

93 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

94 

95 :returns: The available data tiers in ``tier: bool`` form. 

96 :rtype: dict 

97 """ 

98 def role_check(role, node_info): 

99 if role in node_info['roles']: 

100 return True 

101 return False 

102 info = client.nodes.info()['nodes'] 

103 retval = {'data_hot': False, 'data_warm': False, 'data_cold': False, 'data_frozen': False} 

104 for node in info: 

105 for role in ['data_hot', 'data_warm', 'data_cold', 'data_frozen']: 

106 # This guarantees we don't overwrite a True with a False. We only add True values 

107 if role_check(role, info[node]): 

108 retval[role] = True 

109 return retval 

110 

111def get_frozen_prefix(oldidx, curridx): 

112 """ 

113 Use regular expression magic to extract the prefix from the current index, and then use 

114 that with ``partial-`` in front to name the resulting index. 

115 

116 If there is no prefix, then we just send back ``partial-`` 

117 

118 :param oldidx: The index name before it was mounted in cold tier 

119 :param curridx: The current name of the index, as mounted in cold tier 

120 

121 :type oldidx: str 

122 :type curridx: str 

123 

124 :returns: The prefix to prepend the index name with for mounting as frozen 

125 :rtype: str 

126 """ 

127 logger = logging.getLogger(__name__) 

128 pattern = f'^(.*){oldidx}$' 

129 regexp = re.compile(pattern) 

130 match = regexp.match(curridx) 

131 prefix = match.group(1) 

132 logger.debug('Detected match group for prefix: %s', prefix) 

133 if not prefix: 

134 return 'partial-' 

135 return f'partial-{prefix}' 

136 

137def get_indices(client): 

138 """ 

139 Calls :py:meth:`~.elasticsearch.client.IndicesClient.get_settings` 

140 

141 :param client: A client connection object 

142 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

143 

144 :returns: The current list of indices from the cluster 

145 :rtype: list 

146 """ 

147 logger = logging.getLogger(__name__) 

148 try: 

149 indices = list(client.indices.get_settings(index='*', expand_wildcards='open,closed')) 

150 logger.debug('All indices: %s', indices) 

151 return indices 

152 except Exception as err: 

153 raise FailedExecution(f'Failed to get indices. Error: {err}') from err 

154 

155def get_repository(client, repository=''): 

156 """ 

157 Calls :py:meth:`~.elasticsearch.client.SnapshotClient.get_repository` 

158 

159 :param client: A client connection object 

160 :param repository: The Elasticsearch snapshot repository to use 

161 

162 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

163 :type repository: str 

164 

165 :returns: Configuration information for ``repository``. 

166 :rtype: dict 

167 """ 

168 try: 

169 return client.snapshot.get_repository(name=repository) 

170 except (es8exc.TransportError, es8exc.NotFoundError) as err: 

171 msg = ( 

172 f'Unable to get repository {repository}. Error: {err} Check Elasticsearch ' 

173 f'logs for more information.' 

174 ) 

175 raise CuratorException(msg) from err 

176 

177def get_snapshot(client, repository=None, snapshot=''): 

178 """ 

179 Calls :py:meth:`~.elasticsearch.client.SnapshotClient.get` 

180 

181 :param client: A client connection object 

182 :param repository: The Elasticsearch snapshot repository to use 

183 :param snapshot: The snapshot name, or a comma-separated list of snapshots 

184 

185 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

186 :type repository: str 

187 :type snapshot: str 

188 

189 :returns: Information about the provided ``snapshot``, a snapshot (or a comma-separated list of 

190 snapshots). If no snapshot specified, it will collect info for all snapshots. If none 

191 exist, an empty :py:class:`dict` will be returned. 

192 :rtype: dict 

193 """ 

194 if not repository: 

195 raise MissingArgument('No value for "repository" provided') 

196 snapname = '*' if snapshot == '' else snapshot 

197 try: 

198 return client.snapshot.get(repository=repository, snapshot=snapshot) 

199 except (es8exc.TransportError, es8exc.NotFoundError) as err: 

200 msg = ( 

201 f'Unable to get information about snapshot {snapname} from repository: ' 

202 f'{repository}. Error: {err}' 

203 ) 

204 raise FailedExecution(msg) from err 

205 

206def get_snapshot_data(client, repository=None): 

207 """ 

208 Get all snapshots from repository and return a list. 

209 Calls :py:meth:`~.elasticsearch.client.SnapshotClient.get` 

210 

211 :param client: A client connection object 

212 :param repository: The Elasticsearch snapshot repository to use 

213 

214 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

215 :type repository: str 

216 

217 :returns: The list of all snapshots from ``repository`` 

218 :rtype: list 

219 """ 

220 if not repository: 

221 raise MissingArgument('No value for "repository" provided') 

222 try: 

223 return client.snapshot.get(repository=repository, snapshot="*")['snapshots'] 

224 except (es8exc.TransportError, es8exc.NotFoundError) as err: 

225 msg = ( 

226 f'Unable to get snapshot information from repository: ' 

227 f'{repository}. Error: {err}' 

228 ) 

229 raise FailedExecution(msg) from err 

230 

231def get_tier_preference(client, target_tier='data_frozen'): 

232 """Do the tier preference thing in reverse order from coldest to hottest 

233 Based on the value of ``target_tier``, build out the list to use. 

234 

235 :param client: A client connection object 

236 :param target_tier: The target data tier, e.g. data_warm. 

237 

238 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

239 :type target_tier: str 

240 

241 :returns: A suitable tier preference string in csv format 

242 :rtype: str 

243 """ 

244 tiermap = { 

245 'data_content': 0, 

246 'data_hot': 1, 

247 'data_warm': 2, 

248 'data_cold': 3, 

249 'data_frozen': 4, 

250 } 

251 tiers = get_data_tiers(client) 

252 test_list = [] 

253 for tier in ['data_hot', 'data_warm', 'data_cold', 'data_frozen']: 

254 if tier in tiers and tiermap[tier] <= tiermap[target_tier]: 

255 test_list.insert(0, tier) 

256 if target_tier == 'data_frozen': 

257 # We're migrating to frozen here. If a frozen tier exists, frozen searchable snapshot 

258 # mounts should only ever go to the frozen tier. 

259 if 'data_frozen' in tiers and tiers['data_frozen']: 

260 return 'data_frozen' 

261 # If there are no nodes with the 'data_frozen' role... 

262 preflist = [] 

263 for key in test_list: 

264 # This ordering ensures that colder tiers are prioritized 

265 if key in tiers and tiers[key]: 

266 preflist.append(key) 

267 # If all of these are false, then we have no data tiers and must use 'data_content' 

268 if not preflist: 

269 return 'data_content' 

270 # This will join from coldest to hottest as csv string, e.g. 'data_cold,data_warm,data_hot' 

271 return ','.join(preflist) 

272 

273def get_write_index(client, alias): 

274 """ 

275 Calls :py:meth:`~.elasticsearch.client.IndicesClient.get_alias` 

276 

277 :param client: A client connection object 

278 :param alias: An alias name 

279 

280 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

281 :type alias: str 

282 

283 :returns: The the index name associated with the alias that is designated ``is_write_index`` 

284 :rtype: str 

285 """ 

286 try: 

287 response = client.indices.get_alias(index=alias) 

288 except Exception as exc: 

289 raise CuratorException(f'Alias {alias} not found') from exc 

290 # If there are more than one in the list, one needs to be the write index 

291 # otherwise the alias is a one to many, and can't do rollover. 

292 if len(list(response.keys())) > 1: 

293 for index in list(response.keys()): 

294 try: 

295 if response[index]['aliases'][alias]['is_write_index']: 

296 return index 

297 except KeyError as exc: 

298 raise FailedExecution( 

299 'Invalid alias: is_write_index not found in 1 to many alias') from exc 

300 else: 

301 # There's only one, so this is it 

302 return list(response.keys())[0] 

303 

304def index_size(client, idx, value='total'): 

305 """ 

306 Calls :py:meth:`~.elasticsearch.client.IndicesClient.stats` 

307 

308 :param client: A client connection object 

309 :param idx: An index name 

310 :param value: One of either ``primaries`` or ``total`` 

311 

312 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

313 :type idx: str 

314 :type value: str 

315 

316 :returns: The sum of either ``primaries`` or ``total`` shards for index ``idx`` 

317 :rtype: integer 

318 """ 

319 return client.indices.stats(index=idx)['indices'][idx][value]['store']['size_in_bytes'] 

320 

321def name_to_node_id(client, name): 

322 """ 

323 Calls :py:meth:`~.elasticsearch.client.NodesClient.stats` 

324 

325 :param client: A client connection object 

326 :param name: The node ``name`` 

327 

328 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

329 :type name: str 

330 

331 :returns: The node_id of the node identified by ``name`` 

332 :rtype: str 

333 """ 

334 logger = logging.getLogger(__name__) 

335 stats = client.nodes.stats() 

336 for node in stats['nodes']: 

337 if stats['nodes'][node]['name'] == name: 

338 logger.debug('Found node_id "%s" for name "%s".', node, name) 

339 return node 

340 logger.error('No node_id found matching name: "%s"', name) 

341 return None 

342 

343def node_id_to_name(client, node_id): 

344 """ 

345 Calls :py:meth:`~.elasticsearch.client.NodesClient.stats` 

346 

347 :param client: A client connection object 

348 :param node_id: The node ``node_id`` 

349 

350 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

351 :type node_id: str 

352 

353 :returns: The name of the node identified by ``node_id`` 

354 :rtype: str 

355 """ 

356 logger = logging.getLogger(__name__) 

357 stats = client.nodes.stats() 

358 name = None 

359 if node_id in stats['nodes']: 

360 name = stats['nodes'][node_id]['name'] 

361 else: 

362 logger.error('No node_id found matching: "%s"', node_id) 

363 logger.debug('Name associated with node_id "%s": %s', node_id, name) 

364 return name 

365 

366def node_roles(client, node_id): 

367 """ 

368 Calls :py:meth:`~.elasticsearch.client.NodesClient.info` 

369 

370 :param client: A client connection object 

371 :param node_id: The node ``node_id`` 

372 

373 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

374 :type node_id: str 

375 

376 :returns: The list of roles assigned to the node identified by ``node_id`` 

377 :rtype: list 

378 """ 

379 return client.nodes.info()['nodes'][node_id]['roles'] 

380 

381def single_data_path(client, node_id): 

382 """ 

383 In order for a shrink to work, it should be on a single filesystem, as shards cannot span 

384 filesystems. Calls :py:meth:`~.elasticsearch.client.NodesClient.stats` 

385 

386 :param client: A client connection object 

387 :param node_id: The node ``node_id`` 

388 

389 :type client: :py:class:`~.elasticsearch.Elasticsearch` 

390 :type node_id: str 

391 

392 :returns: ``True`` if the node has a single filesystem, else ``False`` 

393 :rtype: bool 

394 """ 

395 return len(client.nodes.stats()['nodes'][node_id]['fs']['data']) == 1