Coverage for curator/actions/snapshot.py: 100%
181 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-20 21:00 -0600
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-20 21:00 -0600
1"""Snapshot and Restore action classes"""
2import logging
3import re
4from es_client.helpers.utils import ensure_list
5from curator.helpers.date_ops import parse_datemath, parse_date_pattern
6from curator.helpers.getters import get_indices
7from curator.helpers.testers import (
8 repository_exists, snapshot_running, verify_index_list, verify_repository, verify_snapshot_list
9)
10from curator.helpers.utils import report_failure, to_csv
11from curator.helpers.waiters import wait_for_it
12# pylint: disable=broad-except
13from curator.exceptions import (
14 ActionError, CuratorException, FailedRestore, FailedSnapshot, MissingArgument,
15 SnapshotInProgress
16 )
18class Snapshot(object):
19 """Snapshot Action Class
21 Read more about identically named settings at:
22 :py:meth:`elasticsearch.client.SnapshotClient.create`
23 """
24 def __init__(self, ilo, repository=None, name=None, ignore_unavailable=False,
25 include_global_state=True, partial=False, wait_for_completion=True, wait_interval=9,
26 max_wait=-1, skip_repo_fs_check=True
27 ):
28 """
29 :param ilo: An IndexList Object
30 :param repository: Repository name.
31 :param name: Snapshot name.
32 :param ignore_unavailable: Ignore unavailable shards/indices.
33 :param include_global_state: Store cluster global state with snapshot.
34 :param partial: Do not fail if primary shard is unavailable.
35 :param wait_for_completion: Wait for completion before returning.
36 :param wait_interval: Seconds to wait between completion checks.
37 :param max_wait: Maximum number of seconds to ``wait_for_completion``
38 :param skip_repo_fs_check: Do not validate write access to repository on all cluster nodes
39 before proceeding. Useful for shared filesystems where intermittent timeouts can affect
40 validation, but won't likely affect snapshot success. (Default: ``True``)
42 :type ilo: :py:class:`~.curator.indexlist.IndexList`
43 :type repository: str
44 :type name: str
45 :type ignore_unavailable: bool
46 :type include_global_state: bool
47 :type partial: bool
48 :type wait_for_completion: bool
49 :type wait_interval: int
50 :type max_wait: int
51 :type skip_repo_fs_check: bool
52 """
53 verify_index_list(ilo)
54 # Check here and don't bother with the rest of this if there are no
55 # indices in the index list.
56 ilo.empty_list_check()
57 if not repository_exists(ilo.client, repository=repository):
58 raise ActionError(
59 f'Cannot snapshot indices to missing repository: {repository}')
60 if not name:
61 raise MissingArgument('No value for "name" provided.')
62 #: The :py:class:`~.curator.indexlist.IndexList` object passed from param ``ilo``
63 self.index_list = ilo
64 #: The :py:class:`~.elasticsearch.Elasticsearch` client object derived from
65 #: :py:attr:`index_list`
66 self.client = ilo.client
67 #: The :py:func:`~.curator.helpers.date_ops.parse_date_pattern` rendered
68 #: version of what was passed by param ``name``.
69 self.name = parse_datemath(self.client, parse_date_pattern(name))
70 #: Object attribute that gets the value of param ``repository``.
71 self.repository = repository
72 #: Object attribute that gets the value of param ``wait_for_completion``.
73 self.wait_for_completion = wait_for_completion
74 #: Object attribute that gets the value of param ``wait_interval``.
75 self.wait_interval = wait_interval
76 #: Object attribute that gets the value of param ``max_wait``.
77 self.max_wait = max_wait
78 #: Object attribute that gets the value of param ``skip_repo_fs_check``.
79 self.skip_repo_fs_check = skip_repo_fs_check
80 #: Object attribute that tracks the snapshot state.
81 self.state = None
82 #: Object attribute that contains the :py:func:`~.curator.helpers.utils.to_csv` output of
83 #: the indices in :py:attr:`index_list`.
84 self.indices = to_csv(ilo.indices)
85 #: Object attribute that gets the value of param ``ignore_unavailable``.
86 self.ignore_unavailable = ignore_unavailable
87 #: Object attribute that gets the value of param ``include_global_state``.
88 self.include_global_state = include_global_state
89 #: Object attribute that gets the value of param ``partial``.
90 self.partial = partial
91 #: Object attribute dictionary compiled from :py:attr:`indices`,
92 #: :py:attr:`ignore_unavailable`, :py:attr:`include_global_state`, and :py:attr:`partial`
93 self.settings = {
94 'indices': ilo.indices,
95 'ignore_unavailable': self.ignore_unavailable,
96 'include_global_state': self.include_global_state,
97 'partial': self.partial
98 }
100 self.loggit = logging.getLogger('curator.actions.snapshot')
102 def get_state(self):
103 """Get the state of the snapshot and set :py:attr:`state`"""
104 try:
105 self.state = self.client.snapshot.get(
106 repository=self.repository, snapshot=self.name)['snapshots'][0]['state']
107 return self.state
108 except IndexError as exc:
109 raise CuratorException(
110 f'Snapshot "{self.name}" not found in repository "{self.repository}"') from exc
112 def report_state(self):
113 """
114 Log the :py:attr:`state` of the snapshot and raise :py:exc:`FailedSnapshot` if
115 :py:attr:`state` is not ``SUCCESS``
116 """
117 self.get_state()
118 if self.state == 'SUCCESS':
119 self.loggit.info('Snapshot %s successfully completed.', self.name)
120 else:
121 msg = f'Snapshot {self.name} completed with state: {self.state}'
122 self.loggit.error(msg)
123 raise FailedSnapshot(msg)
125 def do_dry_run(self):
126 """Log what the output would be, but take no action."""
127 self.loggit.info('DRY-RUN MODE. No changes will be made.')
128 msg = (
129 f'DRY-RUN: snapshot: {self.name} in repository {self.repository} '
130 f'with arguments: {self.settings}'
131 )
132 self.loggit.info(msg)
134 def do_action(self):
135 """
136 :py:meth:`elasticsearch.client.SnapshotClient.create` a snapshot of :py:attr:`indices`,
137 with passed parameters.
138 """
139 if not self.skip_repo_fs_check:
140 verify_repository(self.client, self.repository)
141 if snapshot_running(self.client):
142 raise SnapshotInProgress('Snapshot already in progress.')
143 try:
144 self.loggit.info(
145 'Creating snapshot "%s" from indices: %s', self.name, self.index_list.indices)
146 # Always set wait_for_completion to False. Let 'wait_for_it' do its
147 # thing if wait_for_completion is set to True. Report the task_id
148 # either way.
149 self.client.snapshot.create(
150 repository=self.repository,
151 snapshot=self.name,
152 ignore_unavailable=self.ignore_unavailable,
153 include_global_state=self.include_global_state,
154 indices=self.indices,
155 partial=self.partial,
156 wait_for_completion=False
157 )
158 if self.wait_for_completion:
159 wait_for_it(
160 self.client, 'snapshot', snapshot=self.name,
161 repository=self.repository,
162 wait_interval=self.wait_interval, max_wait=self.max_wait
163 )
164 self.report_state()
165 else:
166 msg = (
167 f'"wait_for_completion" set to {self.wait_for_completion}. '
168 f'Remember to check for successful completion manually.'
169 )
170 self.loggit.warning(msg)
171 except Exception as err:
172 report_failure(err)
174class DeleteSnapshots:
175 """Delete Snapshots Action Class"""
176 def __init__(self, slo, retry_interval=120, retry_count=3):
177 """
178 :param slo: A SnapshotList object
179 :type slo: :py:class:`~.curator.snapshotlist.SnapshotList`
180 :param retry_interval: Seconds to delay betwen retries. (Default: ``120``)
181 :type retry_interval: int
182 :param retry_count: Number of attempts to make. (Default: ``3``)
183 :type retry_count: int
184 """
185 verify_snapshot_list(slo)
186 #: The :py:class:`~.curator.snapshotlist.SnapshotList` object passed from param ``slo``
187 self.snapshot_list = slo
188 #: The :py:class:`~.elasticsearch.Elasticsearch` client object derived from
189 #: :py:attr:`snapshot_list`
190 self.client = slo.client
191 #: Object attribute that gets the value of param ``retry_interval``.
192 self.retry_interval = retry_interval
193 #: Object attribute that gets the value of param ``retry_count``.
194 self.retry_count = retry_count
195 #: Object attribute that gets its value from :py:attr:`snapshot_list`.
196 self.repository = slo.repository
197 self.loggit = logging.getLogger('curator.actions.delete_snapshots')
199 def do_dry_run(self):
200 """Log what the output would be, but take no action."""
201 self.loggit.info('DRY-RUN MODE. No changes will be made.')
202 mykwargs = {
203 'repository' : self.repository,
204 'retry_interval' : self.retry_interval,
205 'retry_count' : self.retry_count,
206 }
207 for snap in self.snapshot_list.snapshots:
208 self.loggit.info('DRY-RUN: delete_snapshot: %s with arguments: %s', snap, mykwargs)
210 def do_action(self):
211 """
212 :py:meth:`~.elasticsearch.client.SnapshotClient.delete` snapshots in
213 :py:attr:`snapshot_list`. Retry up to :py:attr:`retry_count` times, pausing
214 :py:attr:`retry_interval` seconds between retries.
215 """
216 self.snapshot_list.empty_list_check()
217 msg = (
218 f'Deleting {len(self.snapshot_list.snapshots)} '
219 f'selected snapshots: {self.snapshot_list.snapshots}'
220 )
221 self.loggit.info(msg)
222 try:
223 for snap in self.snapshot_list.snapshots:
224 self.loggit.info('Deleting snapshot %s...', snap)
225 self.client.snapshot.delete(repository=self.repository, snapshot=snap)
226 # pylint: disable=broad-except
227 except Exception as err:
228 report_failure(err)
230class Restore(object):
231 """Restore Action Class
233 Read more about identically named settings at:
234 :py:meth:`elasticsearch.client.SnapshotClient.restore`
235 """
236 def __init__(
237 self, slo, name=None, indices=None, include_aliases=False, ignore_unavailable=False,
238 include_global_state=False, partial=False, rename_pattern=None,
239 rename_replacement=None, extra_settings=None, wait_for_completion=True, wait_interval=9,
240 max_wait=-1, skip_repo_fs_check=True
241 ):
242 """
243 :param slo: A SnapshotList object
244 :param name: Name of the snapshot to restore. If ``None``, use the most recent snapshot.
245 :param indices: Indices to restore. If ``None``, all in the snapshot will be restored.
246 :param include_aliases: Restore aliases with the indices.
247 :param ignore_unavailable: Ignore unavailable shards/indices.
248 :param include_global_state: Restore cluster global state with snapshot.
249 :param partial: Do not fail if primary shard is unavailable.
250 :param rename_pattern: A regular expression pattern with one or more captures, e.g.
251 ``index_(.+)``
252 :param rename_replacement: A target index name pattern with `$#` numbered references to the
253 captures in ``rename_pattern``, e.g. ``restored_index_$1``
254 :param extra_settings: Index settings to apply to restored indices.
255 :param wait_for_completion: Wait for completion before returning.
256 :param wait_interval: Seconds to wait between completion checks.
257 :param max_wait: Maximum number of seconds to ``wait_for_completion``
258 :param skip_repo_fs_check: Do not validate write access to repository on all cluster nodes
259 before proceeding. Useful for shared filesystems where intermittent timeouts can affect
260 validation, but won't likely affect snapshot success. (Default: ``True``)
262 :type slo: :py:class:`~.curator.snapshotlist.SnapshotList`
263 :type name: str
264 :type indices: list
265 :type include_aliases: bool
266 :type ignore_unavailable: bool
267 :type include_global_state: bool
268 :type partial: bool
269 :type rename_pattern: str
270 :type rename_replacement: str
271 :type extra_settings: dict
272 :type wait_for_completion: bool
273 :type wait_interval: int
274 :type max_wait: int
275 :type skip_repo_fs_check: bool
276 """
277 if extra_settings is None:
278 extra_settings = {}
279 self.loggit = logging.getLogger('curator.actions.snapshot')
280 verify_snapshot_list(slo)
281 # Get the most recent snapshot.
282 most_recent = slo.most_recent()
283 self.loggit.debug('"most_recent" snapshot: %s', most_recent)
284 #: Object attribute that gets the value of param ``name`` if not ``None``, or the output
285 #: from :py:meth:`~.curator.SnapshotList.most_recent`.
286 self.name = name if name else most_recent
287 # Stop here now, if it's not a successful snapshot.
288 if slo.snapshot_info[self.name]['state'] == 'PARTIAL' and partial:
289 self.loggit.warning('Performing restore of snapshot in state PARTIAL.')
290 elif slo.snapshot_info[self.name]['state'] != 'SUCCESS':
291 raise CuratorException(
292 'Restore operation can only be performed on snapshots with '
293 'state "SUCCESS", or "PARTIAL" if partial=True.'
294 )
296 #: Internal reference to `slo`
297 self.snapshot_list = slo
298 #: The :py:class:`~.elasticsearch.Elasticsearch` client object derived from
299 #: :py:attr:`snapshot_list`
300 self.client = slo.client
301 #: Object attribute that gets the value of ``repository`` from :py:attr:`snapshot_list`.
302 self.repository = slo.repository
304 if indices:
305 self.indices = ensure_list(indices)
306 else:
307 self.indices = slo.snapshot_info[self.name]['indices']
308 #: Object attribute that gets the value of param ``wait_for_completion``.
309 self.wfc = wait_for_completion
310 #: Object attribute that gets the value of param ``wait_interval``.
311 self.wait_interval = wait_interval
312 #: Object attribute that gets the value of param ``max_wait``.
313 self.max_wait = max_wait
314 #: Object attribute that gets the value of param ``rename_pattern``. Empty :py:class:`str`
315 #: if ``None``
316 self.rename_pattern = rename_pattern if rename_replacement is not None \
317 else ''
318 #: Object attribute that gets the value of param ``rename_replacement``. Empty
319 #: :py:class:`str` if ``None``
320 self.rename_replacement = rename_replacement if rename_replacement \
321 is not None else ''
322 #: Object attribute derived from :py:attr:`rename_replacement`. but with Java regex group
323 #: designations of ``$#`` converted to Python's ``\\#`` style.
324 self.py_rename_replacement = self.rename_replacement.replace('$', '\\')
325 #: Object attribute that gets the value of param ``max_wait``.
326 self.skip_repo_fs_check = skip_repo_fs_check
328 #: Object attribute that gets populated from other params/attributes. Deprecated, but not
329 #: removed. Lazy way to keep from updating :py:meth:`do_dry_run`. Will fix later.
330 self.body = {
331 'indices' : self.indices,
332 'include_aliases' : include_aliases,
333 'ignore_unavailable' : ignore_unavailable,
334 'include_global_state' : include_global_state,
335 'partial' : partial,
336 'rename_pattern' : self.rename_pattern,
337 'rename_replacement' : self.rename_replacement,
338 }
339 #: Object attribute that gets the value of param ``include_aliases``.
340 self.include_aliases = include_aliases
341 #: Object attribute that gets the value of param ``ignore_unavailable``.
342 self.ignore_unavailable = ignore_unavailable
343 #: Object attribute that gets the value of param ``include_global_state``.
344 self.include_global_state = include_global_state
345 #: Object attribute that gets the value of param ``include_aliases``.
346 self.include_aliases = include_aliases
347 #: Object attribute that gets the value of param ``partial``.
348 self.partial = partial
349 #: Object attribute that gets the value of param ``extra_settings``.
350 self.index_settings = None
352 if extra_settings:
353 self.loggit.debug('Adding extra_settings to restore body: %s',extra_settings)
354 self.index_settings = extra_settings
355 try:
356 self.body.update(extra_settings)
357 except Exception:
358 self.loggit.error('Unable to apply extra settings to restore body')
359 self.loggit.debug('REPOSITORY: %s', self.repository)
360 self.loggit.debug('WAIT_FOR_COMPLETION: %s', self.wfc)
361 self.loggit.debug('SKIP_REPO_FS_CHECK: %s', self.skip_repo_fs_check)
362 self.loggit.debug('BODY: %s', self.body)
363 # Populate the expected output index list.
364 self._get_expected_output()
366 def _get_expected_output(self):
367 if not self.rename_pattern and not self.rename_replacement:
368 self.expected_output = self.indices
369 return # Don't stick around if we're not replacing anything
370 self.expected_output = []
371 for index in self.indices:
372 self.expected_output.append(
373 re.sub(self.rename_pattern, self.py_rename_replacement, index)
374 )
375 msg = f'index: {index} replacement: {self.expected_output[-1]}'
376 self.loggit.debug(msg)
378 def report_state(self):
379 """
380 Log the state of the restore. This should only be done if ``wait_for_completion`` is
381 ``True``, and only after completing the restore.
382 """
383 all_indices = get_indices(self.client)
384 found_count = 0
385 missing = []
386 for index in self.expected_output:
387 if index in all_indices:
388 found_count += 1
389 self.loggit.info('Found restored index %s', index)
390 else:
391 missing.append(index)
392 if found_count == len(self.expected_output):
393 self.loggit.info('All indices appear to have been restored.')
394 else:
395 msg = f'Some of the indices do not appear to have been restored. Missing: {missing}'
396 self.loggit.error(msg)
397 raise FailedRestore(msg)
399 def do_dry_run(self):
400 """Log what the output would be, but take no action."""
401 self.loggit.info('DRY-RUN MODE. No changes will be made.')
402 args = {'wait_for_completion' : self.wfc, 'body' : self.body}
403 msg = (
404 f'DRY-RUN: restore: Repository: {self.repository} '
405 f'Snapshot name: {self.name} Arguments: {args}'
406 )
407 self.loggit.info(msg)
409 for index in self.indices:
410 if self.rename_pattern and self.rename_replacement:
411 rmsg = f'as {re.sub(self.rename_pattern, self.py_rename_replacement, index)}'
412 else:
413 rmsg = ''
414 self.loggit.info('DRY-RUN: restore: Index %s %s', index, rmsg)
416 def do_action(self):
417 """
418 :py:meth:`~.elasticsearch.client.SnapshotClient.restore` :py:attr:`indices` from
419 :py:attr:`name` with passed params.
420 """
421 if not self.skip_repo_fs_check:
422 verify_repository(self.client, self.repository)
423 if snapshot_running(self.client):
424 raise SnapshotInProgress('Cannot restore while a snapshot is in progress.')
425 try:
426 self.loggit.info('Restoring indices "%s" from snapshot: %s', self.indices, self.name)
427 # Always set wait_for_completion to False. Let 'wait_for_it' do its
428 # thing if wait_for_completion is set to True. Report the task_id
429 # either way.
430 self.client.snapshot.restore(
431 repository=self.repository,
432 snapshot=self.name,
433 ignore_index_settings=None,
434 ignore_unavailable=self.ignore_unavailable,
435 include_aliases=self.include_aliases,
436 include_global_state=self.include_global_state,
437 index_settings=self.index_settings,
438 indices=self.indices,
439 partial=self.partial,
440 rename_pattern=self.rename_pattern,
441 rename_replacement=self.rename_replacement,
442 wait_for_completion=False
443 )
444 if self.wfc:
445 wait_for_it(
446 self.client, 'restore', index_list=self.expected_output,
447 wait_interval=self.wait_interval, max_wait=self.max_wait
448 )
449 self.report_state()
450 else:
451 msg = (
452 f'"wait_for_completion" set to {self.wfc}. '
453 f'Remember to check for successful completion manually.'
454 )
455 self.loggit.warning(msg)
456 except Exception as err:
457 report_failure(err)