Coverage for curator/actions/snapshot.py: 100%

181 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-20 21:00 -0600

1"""Snapshot and Restore action classes""" 

2import logging 

3import re 

4from es_client.helpers.utils import ensure_list 

5from curator.helpers.date_ops import parse_datemath, parse_date_pattern 

6from curator.helpers.getters import get_indices 

7from curator.helpers.testers import ( 

8 repository_exists, snapshot_running, verify_index_list, verify_repository, verify_snapshot_list 

9) 

10from curator.helpers.utils import report_failure, to_csv 

11from curator.helpers.waiters import wait_for_it 

12# pylint: disable=broad-except 

13from curator.exceptions import ( 

14 ActionError, CuratorException, FailedRestore, FailedSnapshot, MissingArgument, 

15 SnapshotInProgress 

16 ) 

17 

18class Snapshot(object): 

19 """Snapshot Action Class 

20 

21 Read more about identically named settings at: 

22 :py:meth:`elasticsearch.client.SnapshotClient.create` 

23 """ 

24 def __init__(self, ilo, repository=None, name=None, ignore_unavailable=False, 

25 include_global_state=True, partial=False, wait_for_completion=True, wait_interval=9, 

26 max_wait=-1, skip_repo_fs_check=True 

27 ): 

28 """ 

29 :param ilo: An IndexList Object 

30 :param repository: Repository name. 

31 :param name: Snapshot name. 

32 :param ignore_unavailable: Ignore unavailable shards/indices. 

33 :param include_global_state: Store cluster global state with snapshot. 

34 :param partial: Do not fail if primary shard is unavailable. 

35 :param wait_for_completion: Wait for completion before returning. 

36 :param wait_interval: Seconds to wait between completion checks. 

37 :param max_wait: Maximum number of seconds to ``wait_for_completion`` 

38 :param skip_repo_fs_check: Do not validate write access to repository on all cluster nodes 

39 before proceeding. Useful for shared filesystems where intermittent timeouts can affect 

40 validation, but won't likely affect snapshot success. (Default: ``True``) 

41 

42 :type ilo: :py:class:`~.curator.indexlist.IndexList` 

43 :type repository: str 

44 :type name: str 

45 :type ignore_unavailable: bool 

46 :type include_global_state: bool 

47 :type partial: bool 

48 :type wait_for_completion: bool 

49 :type wait_interval: int 

50 :type max_wait: int 

51 :type skip_repo_fs_check: bool 

52 """ 

53 verify_index_list(ilo) 

54 # Check here and don't bother with the rest of this if there are no 

55 # indices in the index list. 

56 ilo.empty_list_check() 

57 if not repository_exists(ilo.client, repository=repository): 

58 raise ActionError( 

59 f'Cannot snapshot indices to missing repository: {repository}') 

60 if not name: 

61 raise MissingArgument('No value for "name" provided.') 

62 #: The :py:class:`~.curator.indexlist.IndexList` object passed from param ``ilo`` 

63 self.index_list = ilo 

64 #: The :py:class:`~.elasticsearch.Elasticsearch` client object derived from 

65 #: :py:attr:`index_list` 

66 self.client = ilo.client 

67 #: The :py:func:`~.curator.helpers.date_ops.parse_date_pattern` rendered 

68 #: version of what was passed by param ``name``. 

69 self.name = parse_datemath(self.client, parse_date_pattern(name)) 

70 #: Object attribute that gets the value of param ``repository``. 

71 self.repository = repository 

72 #: Object attribute that gets the value of param ``wait_for_completion``. 

73 self.wait_for_completion = wait_for_completion 

74 #: Object attribute that gets the value of param ``wait_interval``. 

75 self.wait_interval = wait_interval 

76 #: Object attribute that gets the value of param ``max_wait``. 

77 self.max_wait = max_wait 

78 #: Object attribute that gets the value of param ``skip_repo_fs_check``. 

79 self.skip_repo_fs_check = skip_repo_fs_check 

80 #: Object attribute that tracks the snapshot state. 

81 self.state = None 

82 #: Object attribute that contains the :py:func:`~.curator.helpers.utils.to_csv` output of 

83 #: the indices in :py:attr:`index_list`. 

84 self.indices = to_csv(ilo.indices) 

85 #: Object attribute that gets the value of param ``ignore_unavailable``. 

86 self.ignore_unavailable = ignore_unavailable 

87 #: Object attribute that gets the value of param ``include_global_state``. 

88 self.include_global_state = include_global_state 

89 #: Object attribute that gets the value of param ``partial``. 

90 self.partial = partial 

91 #: Object attribute dictionary compiled from :py:attr:`indices`, 

92 #: :py:attr:`ignore_unavailable`, :py:attr:`include_global_state`, and :py:attr:`partial` 

93 self.settings = { 

94 'indices': ilo.indices, 

95 'ignore_unavailable': self.ignore_unavailable, 

96 'include_global_state': self.include_global_state, 

97 'partial': self.partial 

98 } 

99 

100 self.loggit = logging.getLogger('curator.actions.snapshot') 

101 

102 def get_state(self): 

103 """Get the state of the snapshot and set :py:attr:`state`""" 

104 try: 

105 self.state = self.client.snapshot.get( 

106 repository=self.repository, snapshot=self.name)['snapshots'][0]['state'] 

107 return self.state 

108 except IndexError as exc: 

109 raise CuratorException( 

110 f'Snapshot "{self.name}" not found in repository "{self.repository}"') from exc 

111 

112 def report_state(self): 

113 """ 

114 Log the :py:attr:`state` of the snapshot and raise :py:exc:`FailedSnapshot` if 

115 :py:attr:`state` is not ``SUCCESS`` 

116 """ 

117 self.get_state() 

118 if self.state == 'SUCCESS': 

119 self.loggit.info('Snapshot %s successfully completed.', self.name) 

120 else: 

121 msg = f'Snapshot {self.name} completed with state: {self.state}' 

122 self.loggit.error(msg) 

123 raise FailedSnapshot(msg) 

124 

125 def do_dry_run(self): 

126 """Log what the output would be, but take no action.""" 

127 self.loggit.info('DRY-RUN MODE. No changes will be made.') 

128 msg = ( 

129 f'DRY-RUN: snapshot: {self.name} in repository {self.repository} ' 

130 f'with arguments: {self.settings}' 

131 ) 

132 self.loggit.info(msg) 

133 

134 def do_action(self): 

135 """ 

136 :py:meth:`elasticsearch.client.SnapshotClient.create` a snapshot of :py:attr:`indices`, 

137 with passed parameters. 

138 """ 

139 if not self.skip_repo_fs_check: 

140 verify_repository(self.client, self.repository) 

141 if snapshot_running(self.client): 

142 raise SnapshotInProgress('Snapshot already in progress.') 

143 try: 

144 self.loggit.info( 

145 'Creating snapshot "%s" from indices: %s', self.name, self.index_list.indices) 

146 # Always set wait_for_completion to False. Let 'wait_for_it' do its 

147 # thing if wait_for_completion is set to True. Report the task_id 

148 # either way. 

149 self.client.snapshot.create( 

150 repository=self.repository, 

151 snapshot=self.name, 

152 ignore_unavailable=self.ignore_unavailable, 

153 include_global_state=self.include_global_state, 

154 indices=self.indices, 

155 partial=self.partial, 

156 wait_for_completion=False 

157 ) 

158 if self.wait_for_completion: 

159 wait_for_it( 

160 self.client, 'snapshot', snapshot=self.name, 

161 repository=self.repository, 

162 wait_interval=self.wait_interval, max_wait=self.max_wait 

163 ) 

164 self.report_state() 

165 else: 

166 msg = ( 

167 f'"wait_for_completion" set to {self.wait_for_completion}. ' 

168 f'Remember to check for successful completion manually.' 

169 ) 

170 self.loggit.warning(msg) 

171 except Exception as err: 

172 report_failure(err) 

173 

174class DeleteSnapshots: 

175 """Delete Snapshots Action Class""" 

176 def __init__(self, slo, retry_interval=120, retry_count=3): 

177 """ 

178 :param slo: A SnapshotList object 

179 :type slo: :py:class:`~.curator.snapshotlist.SnapshotList` 

180 :param retry_interval: Seconds to delay betwen retries. (Default: ``120``) 

181 :type retry_interval: int 

182 :param retry_count: Number of attempts to make. (Default: ``3``) 

183 :type retry_count: int 

184 """ 

185 verify_snapshot_list(slo) 

186 #: The :py:class:`~.curator.snapshotlist.SnapshotList` object passed from param ``slo`` 

187 self.snapshot_list = slo 

188 #: The :py:class:`~.elasticsearch.Elasticsearch` client object derived from 

189 #: :py:attr:`snapshot_list` 

190 self.client = slo.client 

191 #: Object attribute that gets the value of param ``retry_interval``. 

192 self.retry_interval = retry_interval 

193 #: Object attribute that gets the value of param ``retry_count``. 

194 self.retry_count = retry_count 

195 #: Object attribute that gets its value from :py:attr:`snapshot_list`. 

196 self.repository = slo.repository 

197 self.loggit = logging.getLogger('curator.actions.delete_snapshots') 

198 

199 def do_dry_run(self): 

200 """Log what the output would be, but take no action.""" 

201 self.loggit.info('DRY-RUN MODE. No changes will be made.') 

202 mykwargs = { 

203 'repository' : self.repository, 

204 'retry_interval' : self.retry_interval, 

205 'retry_count' : self.retry_count, 

206 } 

207 for snap in self.snapshot_list.snapshots: 

208 self.loggit.info('DRY-RUN: delete_snapshot: %s with arguments: %s', snap, mykwargs) 

209 

210 def do_action(self): 

211 """ 

212 :py:meth:`~.elasticsearch.client.SnapshotClient.delete` snapshots in 

213 :py:attr:`snapshot_list`. Retry up to :py:attr:`retry_count` times, pausing 

214 :py:attr:`retry_interval` seconds between retries. 

215 """ 

216 self.snapshot_list.empty_list_check() 

217 msg = ( 

218 f'Deleting {len(self.snapshot_list.snapshots)} ' 

219 f'selected snapshots: {self.snapshot_list.snapshots}' 

220 ) 

221 self.loggit.info(msg) 

222 try: 

223 for snap in self.snapshot_list.snapshots: 

224 self.loggit.info('Deleting snapshot %s...', snap) 

225 self.client.snapshot.delete(repository=self.repository, snapshot=snap) 

226 # pylint: disable=broad-except 

227 except Exception as err: 

228 report_failure(err) 

229 

230class Restore(object): 

231 """Restore Action Class 

232 

233 Read more about identically named settings at: 

234 :py:meth:`elasticsearch.client.SnapshotClient.restore` 

235 """ 

236 def __init__( 

237 self, slo, name=None, indices=None, include_aliases=False, ignore_unavailable=False, 

238 include_global_state=False, partial=False, rename_pattern=None, 

239 rename_replacement=None, extra_settings=None, wait_for_completion=True, wait_interval=9, 

240 max_wait=-1, skip_repo_fs_check=True 

241 ): 

242 """ 

243 :param slo: A SnapshotList object 

244 :param name: Name of the snapshot to restore. If ``None``, use the most recent snapshot. 

245 :param indices: Indices to restore. If ``None``, all in the snapshot will be restored. 

246 :param include_aliases: Restore aliases with the indices. 

247 :param ignore_unavailable: Ignore unavailable shards/indices. 

248 :param include_global_state: Restore cluster global state with snapshot. 

249 :param partial: Do not fail if primary shard is unavailable. 

250 :param rename_pattern: A regular expression pattern with one or more captures, e.g. 

251 ``index_(.+)`` 

252 :param rename_replacement: A target index name pattern with `$#` numbered references to the 

253 captures in ``rename_pattern``, e.g. ``restored_index_$1`` 

254 :param extra_settings: Index settings to apply to restored indices. 

255 :param wait_for_completion: Wait for completion before returning. 

256 :param wait_interval: Seconds to wait between completion checks. 

257 :param max_wait: Maximum number of seconds to ``wait_for_completion`` 

258 :param skip_repo_fs_check: Do not validate write access to repository on all cluster nodes 

259 before proceeding. Useful for shared filesystems where intermittent timeouts can affect 

260 validation, but won't likely affect snapshot success. (Default: ``True``) 

261 

262 :type slo: :py:class:`~.curator.snapshotlist.SnapshotList` 

263 :type name: str 

264 :type indices: list 

265 :type include_aliases: bool 

266 :type ignore_unavailable: bool 

267 :type include_global_state: bool 

268 :type partial: bool 

269 :type rename_pattern: str 

270 :type rename_replacement: str 

271 :type extra_settings: dict 

272 :type wait_for_completion: bool 

273 :type wait_interval: int 

274 :type max_wait: int 

275 :type skip_repo_fs_check: bool 

276 """ 

277 if extra_settings is None: 

278 extra_settings = {} 

279 self.loggit = logging.getLogger('curator.actions.snapshot') 

280 verify_snapshot_list(slo) 

281 # Get the most recent snapshot. 

282 most_recent = slo.most_recent() 

283 self.loggit.debug('"most_recent" snapshot: %s', most_recent) 

284 #: Object attribute that gets the value of param ``name`` if not ``None``, or the output 

285 #: from :py:meth:`~.curator.SnapshotList.most_recent`. 

286 self.name = name if name else most_recent 

287 # Stop here now, if it's not a successful snapshot. 

288 if slo.snapshot_info[self.name]['state'] == 'PARTIAL' and partial: 

289 self.loggit.warning('Performing restore of snapshot in state PARTIAL.') 

290 elif slo.snapshot_info[self.name]['state'] != 'SUCCESS': 

291 raise CuratorException( 

292 'Restore operation can only be performed on snapshots with ' 

293 'state "SUCCESS", or "PARTIAL" if partial=True.' 

294 ) 

295 

296 #: Internal reference to `slo` 

297 self.snapshot_list = slo 

298 #: The :py:class:`~.elasticsearch.Elasticsearch` client object derived from 

299 #: :py:attr:`snapshot_list` 

300 self.client = slo.client 

301 #: Object attribute that gets the value of ``repository`` from :py:attr:`snapshot_list`. 

302 self.repository = slo.repository 

303 

304 if indices: 

305 self.indices = ensure_list(indices) 

306 else: 

307 self.indices = slo.snapshot_info[self.name]['indices'] 

308 #: Object attribute that gets the value of param ``wait_for_completion``. 

309 self.wfc = wait_for_completion 

310 #: Object attribute that gets the value of param ``wait_interval``. 

311 self.wait_interval = wait_interval 

312 #: Object attribute that gets the value of param ``max_wait``. 

313 self.max_wait = max_wait 

314 #: Object attribute that gets the value of param ``rename_pattern``. Empty :py:class:`str` 

315 #: if ``None`` 

316 self.rename_pattern = rename_pattern if rename_replacement is not None \ 

317 else '' 

318 #: Object attribute that gets the value of param ``rename_replacement``. Empty 

319 #: :py:class:`str` if ``None`` 

320 self.rename_replacement = rename_replacement if rename_replacement \ 

321 is not None else '' 

322 #: Object attribute derived from :py:attr:`rename_replacement`. but with Java regex group 

323 #: designations of ``$#`` converted to Python's ``\\#`` style. 

324 self.py_rename_replacement = self.rename_replacement.replace('$', '\\') 

325 #: Object attribute that gets the value of param ``max_wait``. 

326 self.skip_repo_fs_check = skip_repo_fs_check 

327 

328 #: Object attribute that gets populated from other params/attributes. Deprecated, but not 

329 #: removed. Lazy way to keep from updating :py:meth:`do_dry_run`. Will fix later. 

330 self.body = { 

331 'indices' : self.indices, 

332 'include_aliases' : include_aliases, 

333 'ignore_unavailable' : ignore_unavailable, 

334 'include_global_state' : include_global_state, 

335 'partial' : partial, 

336 'rename_pattern' : self.rename_pattern, 

337 'rename_replacement' : self.rename_replacement, 

338 } 

339 #: Object attribute that gets the value of param ``include_aliases``. 

340 self.include_aliases = include_aliases 

341 #: Object attribute that gets the value of param ``ignore_unavailable``. 

342 self.ignore_unavailable = ignore_unavailable 

343 #: Object attribute that gets the value of param ``include_global_state``. 

344 self.include_global_state = include_global_state 

345 #: Object attribute that gets the value of param ``include_aliases``. 

346 self.include_aliases = include_aliases 

347 #: Object attribute that gets the value of param ``partial``. 

348 self.partial = partial 

349 #: Object attribute that gets the value of param ``extra_settings``. 

350 self.index_settings = None 

351 

352 if extra_settings: 

353 self.loggit.debug('Adding extra_settings to restore body: %s',extra_settings) 

354 self.index_settings = extra_settings 

355 try: 

356 self.body.update(extra_settings) 

357 except Exception: 

358 self.loggit.error('Unable to apply extra settings to restore body') 

359 self.loggit.debug('REPOSITORY: %s', self.repository) 

360 self.loggit.debug('WAIT_FOR_COMPLETION: %s', self.wfc) 

361 self.loggit.debug('SKIP_REPO_FS_CHECK: %s', self.skip_repo_fs_check) 

362 self.loggit.debug('BODY: %s', self.body) 

363 # Populate the expected output index list. 

364 self._get_expected_output() 

365 

366 def _get_expected_output(self): 

367 if not self.rename_pattern and not self.rename_replacement: 

368 self.expected_output = self.indices 

369 return # Don't stick around if we're not replacing anything 

370 self.expected_output = [] 

371 for index in self.indices: 

372 self.expected_output.append( 

373 re.sub(self.rename_pattern, self.py_rename_replacement, index) 

374 ) 

375 msg = f'index: {index} replacement: {self.expected_output[-1]}' 

376 self.loggit.debug(msg) 

377 

378 def report_state(self): 

379 """ 

380 Log the state of the restore. This should only be done if ``wait_for_completion`` is 

381 ``True``, and only after completing the restore. 

382 """ 

383 all_indices = get_indices(self.client) 

384 found_count = 0 

385 missing = [] 

386 for index in self.expected_output: 

387 if index in all_indices: 

388 found_count += 1 

389 self.loggit.info('Found restored index %s', index) 

390 else: 

391 missing.append(index) 

392 if found_count == len(self.expected_output): 

393 self.loggit.info('All indices appear to have been restored.') 

394 else: 

395 msg = f'Some of the indices do not appear to have been restored. Missing: {missing}' 

396 self.loggit.error(msg) 

397 raise FailedRestore(msg) 

398 

399 def do_dry_run(self): 

400 """Log what the output would be, but take no action.""" 

401 self.loggit.info('DRY-RUN MODE. No changes will be made.') 

402 args = {'wait_for_completion' : self.wfc, 'body' : self.body} 

403 msg = ( 

404 f'DRY-RUN: restore: Repository: {self.repository} ' 

405 f'Snapshot name: {self.name} Arguments: {args}' 

406 ) 

407 self.loggit.info(msg) 

408 

409 for index in self.indices: 

410 if self.rename_pattern and self.rename_replacement: 

411 rmsg = f'as {re.sub(self.rename_pattern, self.py_rename_replacement, index)}' 

412 else: 

413 rmsg = '' 

414 self.loggit.info('DRY-RUN: restore: Index %s %s', index, rmsg) 

415 

416 def do_action(self): 

417 """ 

418 :py:meth:`~.elasticsearch.client.SnapshotClient.restore` :py:attr:`indices` from 

419 :py:attr:`name` with passed params. 

420 """ 

421 if not self.skip_repo_fs_check: 

422 verify_repository(self.client, self.repository) 

423 if snapshot_running(self.client): 

424 raise SnapshotInProgress('Cannot restore while a snapshot is in progress.') 

425 try: 

426 self.loggit.info('Restoring indices "%s" from snapshot: %s', self.indices, self.name) 

427 # Always set wait_for_completion to False. Let 'wait_for_it' do its 

428 # thing if wait_for_completion is set to True. Report the task_id 

429 # either way. 

430 self.client.snapshot.restore( 

431 repository=self.repository, 

432 snapshot=self.name, 

433 ignore_index_settings=None, 

434 ignore_unavailable=self.ignore_unavailable, 

435 include_aliases=self.include_aliases, 

436 include_global_state=self.include_global_state, 

437 index_settings=self.index_settings, 

438 indices=self.indices, 

439 partial=self.partial, 

440 rename_pattern=self.rename_pattern, 

441 rename_replacement=self.rename_replacement, 

442 wait_for_completion=False 

443 ) 

444 if self.wfc: 

445 wait_for_it( 

446 self.client, 'restore', index_list=self.expected_output, 

447 wait_interval=self.wait_interval, max_wait=self.max_wait 

448 ) 

449 self.report_state() 

450 else: 

451 msg = ( 

452 f'"wait_for_completion" set to {self.wfc}. ' 

453 f'Remember to check for successful completion manually.' 

454 ) 

455 self.loggit.warning(msg) 

456 except Exception as err: 

457 report_failure(err)