Coverage for curator/helpers/getters.py: 94%
135 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-20 21:00 -0600
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-20 21:00 -0600
1"""Utility functions that get things"""
2# :pylint disable=
3import logging
4import re
5from elasticsearch8 import exceptions as es8exc
6from es_client.defaults import VERSION_MAX, VERSION_MIN
7from es_client.builder import Builder
8from curator.exceptions import ClientException, CuratorException, FailedExecution, MissingArgument
10def byte_size(num, suffix='B'):
11 """
12 :param num: The number of byte
13 :param suffix: An arbitrary suffix, like ``Bytes``
15 :type num: int
16 :type suffix: str
18 :returns: A formatted string indicating the size in bytes, with the proper unit,
19 e.g. KB, MB, GB, TB, etc.
20 :rtype: float
21 """
22 for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']:
23 if abs(num) < 1024.0:
24 return f'{num:3.1f}{unit}{suffix}'
25 num /= 1024.0
26 return f'{num:.1f}Y{suffix}'
28def get_alias_actions(oldidx, newidx, aliases):
29 """
30 :param oldidx: The old index name
31 :param newidx: The new index name
32 :param aliases: The aliases
34 :type oldidx: str
35 :type newidx: str
36 :type aliases: dict
38 :returns: A list of actions suitable for
39 :py:meth:`~.elasticsearch.client.IndicesClient.update_aliases` ``actions`` kwarg.
40 :rtype: list
41 """
42 actions = []
43 for alias in aliases.keys():
44 actions.append({'remove': {'index': oldidx, 'alias': alias}})
45 actions.append({'add': {'index': newidx, 'alias': alias}})
46 return actions
48def get_client(
49 configdict=None, configfile=None, autoconnect=False, version_min=VERSION_MIN,
50 version_max=VERSION_MAX):
51 """Get an Elasticsearch Client using :py:class:`es_client.Builder`
53 Build a client out of settings from `configfile` or `configdict`
54 If neither `configfile` nor `configdict` is provided, empty defaults will be used.
55 If both are provided, `configdict` will be used, and `configfile` ignored.
57 :param configdict: A configuration dictionary
58 :param configfile: A configuration file
59 :param autoconnect: Connect to client automatically
60 :param verion_min: Minimum acceptable version of Elasticsearch (major, minor, patch)
61 :param verion_max: Maximum acceptable version of Elasticsearch (major, minor, patch)
63 :type configdict: dict
64 :type configfile: str
65 :type autoconnect: bool
66 :type version_min: tuple
67 :type version_max: tuple
69 :returns: A client connection object
70 :rtype: :py:class:`~.elasticsearch.Elasticsearch`
71 """
72 logger = logging.getLogger(__name__)
73 logger.info('Creating client object and testing connection')
75 builder = Builder(
76 configdict=configdict, configfile=configfile, autoconnect=autoconnect,
77 version_min=version_min, version_max=version_max
78 )
80 try:
81 builder.connect()
82 except Exception as exc:
83 logger.critical('Exception encountered: %s', exc)
84 raise ClientException from exc
86 return builder.client
88def get_data_tiers(client):
89 """
90 Get all valid data tiers from the node roles of each node in the cluster by polling each node
92 :param client: A client connection object
93 :type client: :py:class:`~.elasticsearch.Elasticsearch`
95 :returns: The available data tiers in ``tier: bool`` form.
96 :rtype: dict
97 """
98 def role_check(role, node_info):
99 if role in node_info['roles']:
100 return True
101 return False
102 info = client.nodes.info()['nodes']
103 retval = {'data_hot': False, 'data_warm': False, 'data_cold': False, 'data_frozen': False}
104 for node in info:
105 for role in ['data_hot', 'data_warm', 'data_cold', 'data_frozen']:
106 # This guarantees we don't overwrite a True with a False. We only add True values
107 if role_check(role, info[node]):
108 retval[role] = True
109 return retval
111def get_frozen_prefix(oldidx, curridx):
112 """
113 Use regular expression magic to extract the prefix from the current index, and then use
114 that with ``partial-`` in front to name the resulting index.
116 If there is no prefix, then we just send back ``partial-``
118 :param oldidx: The index name before it was mounted in cold tier
119 :param curridx: The current name of the index, as mounted in cold tier
121 :type oldidx: str
122 :type curridx: str
124 :returns: The prefix to prepend the index name with for mounting as frozen
125 :rtype: str
126 """
127 logger = logging.getLogger(__name__)
128 pattern = f'^(.*){oldidx}$'
129 regexp = re.compile(pattern)
130 match = regexp.match(curridx)
131 prefix = match.group(1)
132 logger.debug('Detected match group for prefix: %s', prefix)
133 if not prefix:
134 return 'partial-'
135 return f'partial-{prefix}'
137def get_indices(client):
138 """
139 Calls :py:meth:`~.elasticsearch.client.IndicesClient.get_settings`
141 :param client: A client connection object
142 :type client: :py:class:`~.elasticsearch.Elasticsearch`
144 :returns: The current list of indices from the cluster
145 :rtype: list
146 """
147 logger = logging.getLogger(__name__)
148 try:
149 indices = list(client.indices.get_settings(index='*', expand_wildcards='open,closed'))
150 logger.debug('All indices: %s', indices)
151 return indices
152 except Exception as err:
153 raise FailedExecution(f'Failed to get indices. Error: {err}') from err
155def get_repository(client, repository=''):
156 """
157 Calls :py:meth:`~.elasticsearch.client.SnapshotClient.get_repository`
159 :param client: A client connection object
160 :param repository: The Elasticsearch snapshot repository to use
162 :type client: :py:class:`~.elasticsearch.Elasticsearch`
163 :type repository: str
165 :returns: Configuration information for ``repository``.
166 :rtype: dict
167 """
168 try:
169 return client.snapshot.get_repository(name=repository)
170 except (es8exc.TransportError, es8exc.NotFoundError) as err:
171 msg = (
172 f'Unable to get repository {repository}. Error: {err} Check Elasticsearch '
173 f'logs for more information.'
174 )
175 raise CuratorException(msg) from err
177def get_snapshot(client, repository=None, snapshot=''):
178 """
179 Calls :py:meth:`~.elasticsearch.client.SnapshotClient.get`
181 :param client: A client connection object
182 :param repository: The Elasticsearch snapshot repository to use
183 :param snapshot: The snapshot name, or a comma-separated list of snapshots
185 :type client: :py:class:`~.elasticsearch.Elasticsearch`
186 :type repository: str
187 :type snapshot: str
189 :returns: Information about the provided ``snapshot``, a snapshot (or a comma-separated list of
190 snapshots). If no snapshot specified, it will collect info for all snapshots. If none
191 exist, an empty :py:class:`dict` will be returned.
192 :rtype: dict
193 """
194 if not repository:
195 raise MissingArgument('No value for "repository" provided')
196 snapname = '*' if snapshot == '' else snapshot
197 try:
198 return client.snapshot.get(repository=repository, snapshot=snapshot)
199 except (es8exc.TransportError, es8exc.NotFoundError) as err:
200 msg = (
201 f'Unable to get information about snapshot {snapname} from repository: '
202 f'{repository}. Error: {err}'
203 )
204 raise FailedExecution(msg) from err
206def get_snapshot_data(client, repository=None):
207 """
208 Get all snapshots from repository and return a list.
209 Calls :py:meth:`~.elasticsearch.client.SnapshotClient.get`
211 :param client: A client connection object
212 :param repository: The Elasticsearch snapshot repository to use
214 :type client: :py:class:`~.elasticsearch.Elasticsearch`
215 :type repository: str
217 :returns: The list of all snapshots from ``repository``
218 :rtype: list
219 """
220 if not repository:
221 raise MissingArgument('No value for "repository" provided')
222 try:
223 return client.snapshot.get(repository=repository, snapshot="*")['snapshots']
224 except (es8exc.TransportError, es8exc.NotFoundError) as err:
225 msg = (
226 f'Unable to get snapshot information from repository: '
227 f'{repository}. Error: {err}'
228 )
229 raise FailedExecution(msg) from err
231def get_tier_preference(client, target_tier='data_frozen'):
232 """Do the tier preference thing in reverse order from coldest to hottest
233 Based on the value of ``target_tier``, build out the list to use.
235 :param client: A client connection object
236 :param target_tier: The target data tier, e.g. data_warm.
238 :type client: :py:class:`~.elasticsearch.Elasticsearch`
239 :type target_tier: str
241 :returns: A suitable tier preference string in csv format
242 :rtype: str
243 """
244 tiermap = {
245 'data_content': 0,
246 'data_hot': 1,
247 'data_warm': 2,
248 'data_cold': 3,
249 'data_frozen': 4,
250 }
251 tiers = get_data_tiers(client)
252 test_list = []
253 for tier in ['data_hot', 'data_warm', 'data_cold', 'data_frozen']:
254 if tier in tiers and tiermap[tier] <= tiermap[target_tier]:
255 test_list.insert(0, tier)
256 if target_tier == 'data_frozen':
257 # We're migrating to frozen here. If a frozen tier exists, frozen searchable snapshot
258 # mounts should only ever go to the frozen tier.
259 if 'data_frozen' in tiers and tiers['data_frozen']:
260 return 'data_frozen'
261 # If there are no nodes with the 'data_frozen' role...
262 preflist = []
263 for key in test_list:
264 # This ordering ensures that colder tiers are prioritized
265 if key in tiers and tiers[key]:
266 preflist.append(key)
267 # If all of these are false, then we have no data tiers and must use 'data_content'
268 if not preflist:
269 return 'data_content'
270 # This will join from coldest to hottest as csv string, e.g. 'data_cold,data_warm,data_hot'
271 return ','.join(preflist)
273def get_write_index(client, alias):
274 """
275 Calls :py:meth:`~.elasticsearch.client.IndicesClient.get_alias`
277 :param client: A client connection object
278 :param alias: An alias name
280 :type client: :py:class:`~.elasticsearch.Elasticsearch`
281 :type alias: str
283 :returns: The the index name associated with the alias that is designated ``is_write_index``
284 :rtype: str
285 """
286 try:
287 response = client.indices.get_alias(index=alias)
288 except Exception as exc:
289 raise CuratorException(f'Alias {alias} not found') from exc
290 # If there are more than one in the list, one needs to be the write index
291 # otherwise the alias is a one to many, and can't do rollover.
292 if len(list(response.keys())) > 1:
293 for index in list(response.keys()):
294 try:
295 if response[index]['aliases'][alias]['is_write_index']:
296 return index
297 except KeyError as exc:
298 raise FailedExecution(
299 'Invalid alias: is_write_index not found in 1 to many alias') from exc
300 else:
301 # There's only one, so this is it
302 return list(response.keys())[0]
304def index_size(client, idx, value='total'):
305 """
306 Calls :py:meth:`~.elasticsearch.client.IndicesClient.stats`
308 :param client: A client connection object
309 :param idx: An index name
310 :param value: One of either ``primaries`` or ``total``
312 :type client: :py:class:`~.elasticsearch.Elasticsearch`
313 :type idx: str
314 :type value: str
316 :returns: The sum of either ``primaries`` or ``total`` shards for index ``idx``
317 :rtype: integer
318 """
319 return client.indices.stats(index=idx)['indices'][idx][value]['store']['size_in_bytes']
321def name_to_node_id(client, name):
322 """
323 Calls :py:meth:`~.elasticsearch.client.NodesClient.stats`
325 :param client: A client connection object
326 :param name: The node ``name``
328 :type client: :py:class:`~.elasticsearch.Elasticsearch`
329 :type name: str
331 :returns: The node_id of the node identified by ``name``
332 :rtype: str
333 """
334 logger = logging.getLogger(__name__)
335 stats = client.nodes.stats()
336 for node in stats['nodes']:
337 if stats['nodes'][node]['name'] == name:
338 logger.debug('Found node_id "%s" for name "%s".', node, name)
339 return node
340 logger.error('No node_id found matching name: "%s"', name)
341 return None
343def node_id_to_name(client, node_id):
344 """
345 Calls :py:meth:`~.elasticsearch.client.NodesClient.stats`
347 :param client: A client connection object
348 :param node_id: The node ``node_id``
350 :type client: :py:class:`~.elasticsearch.Elasticsearch`
351 :type node_id: str
353 :returns: The name of the node identified by ``node_id``
354 :rtype: str
355 """
356 logger = logging.getLogger(__name__)
357 stats = client.nodes.stats()
358 name = None
359 if node_id in stats['nodes']:
360 name = stats['nodes'][node_id]['name']
361 else:
362 logger.error('No node_id found matching: "%s"', node_id)
363 logger.debug('Name associated with node_id "%s": %s', node_id, name)
364 return name
366def node_roles(client, node_id):
367 """
368 Calls :py:meth:`~.elasticsearch.client.NodesClient.info`
370 :param client: A client connection object
371 :param node_id: The node ``node_id``
373 :type client: :py:class:`~.elasticsearch.Elasticsearch`
374 :type node_id: str
376 :returns: The list of roles assigned to the node identified by ``node_id``
377 :rtype: list
378 """
379 return client.nodes.info()['nodes'][node_id]['roles']
381def single_data_path(client, node_id):
382 """
383 In order for a shrink to work, it should be on a single filesystem, as shards cannot span
384 filesystems. Calls :py:meth:`~.elasticsearch.client.NodesClient.stats`
386 :param client: A client connection object
387 :param node_id: The node ``node_id``
389 :type client: :py:class:`~.elasticsearch.Elasticsearch`
390 :type node_id: str
392 :returns: ``True`` if the node has a single filesystem, else ``False``
393 :rtype: bool
394 """
395 return len(client.nodes.stats()['nodes'][node_id]['fs']['data']) == 1