Coverage for curator/snapshotlist.py: 100%

206 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-16 15:27 -0600

1"""SnapshotList""" 

2import time 

3import re 

4import logging 

5from datetime import timedelta, datetime, date 

6from curator import exceptions, utils 

7from curator.defaults import settings 

8from curator.validators import SchemaCheck, filters 

9 

10class SnapshotList(object): 

11 """Snapshot list object""" 

12 def __init__(self, client, repository=None): 

13 utils.verify_client_object(client) 

14 if not repository: 

15 raise exceptions.MissingArgument('No value for "repository" provided') 

16 if not utils.repository_exists(client, repository): 

17 raise exceptions.FailedExecution( 

18 'Unable to verify existence of repository ' 

19 '{0}'.format(repository) 

20 ) 

21 self.loggit = logging.getLogger('curator.snapshotlist') 

22 #: An Elasticsearch Client object. 

23 #: Also accessible as an instance variable. 

24 self.client = client 

25 #: An Elasticsearch repository. 

26 #: Also accessible as an instance variable. 

27 self.repository = repository 

28 #: Instance variable. 

29 #: Information extracted from snapshots, such as age, etc. 

30 #: Populated by internal method `__get_snapshots` at instance creation 

31 #: time. **Type:** ``dict()`` 

32 self.snapshot_info = {} 

33 #: Instance variable. 

34 #: The running list of snapshots which will be used by an Action class. 

35 #: Populated by internal methods `__get_snapshots` at instance creation 

36 #: time. **Type:** ``list()`` 

37 self.snapshots = [] 

38 #: Instance variable. 

39 #: Raw data dump of all snapshots in the repository at instance creation 

40 #: time. **Type:** ``list()`` of ``dict()`` data. 

41 self.__get_snapshots() 

42 

43 

44 def __actionable(self, snap): 

45 self.loggit.debug( 

46 'Snapshot {0} is actionable and remains in the list.'.format(snap)) 

47 

48 def __not_actionable(self, snap): 

49 self.loggit.debug( 

50 'Snapshot {0} is not actionable, removing from ' 

51 'list.'.format(snap) 

52 ) 

53 self.snapshots.remove(snap) 

54 

55 def __excludify(self, condition, exclude, snap, msg=None): 

56 if condition: 

57 if exclude: 

58 text = "Removed from actionable list" 

59 self.__not_actionable(snap) 

60 else: 

61 text = "Remains in actionable list" 

62 self.__actionable(snap) 

63 else: 

64 if exclude: 

65 text = "Remains in actionable list" 

66 self.__actionable(snap) 

67 else: 

68 text = "Removed from actionable list" 

69 self.__not_actionable(snap) 

70 if msg: 

71 self.loggit.debug('{0}: {1}'.format(text, msg)) 

72 

73 def __get_snapshots(self): 

74 """ 

75 Pull all snapshots into `snapshots` and populate 

76 `snapshot_info` 

77 """ 

78 self.all_snapshots = utils.get_snapshot_data(self.client, self.repository) 

79 for list_item in self.all_snapshots: 

80 if 'snapshot' in list_item.keys(): 

81 self.snapshots.append(list_item['snapshot']) 

82 self.snapshot_info[list_item['snapshot']] = list_item 

83 self.empty_list_check() 

84 

85 def __map_method(self, ftype): 

86 methods = { 

87 'age': self.filter_by_age, 

88 'count': self.filter_by_count, 

89 'none': self.filter_none, 

90 'pattern': self.filter_by_regex, 

91 'period': self.filter_period, 

92 'state': self.filter_by_state, 

93 } 

94 return methods[ftype] 

95 

96 def empty_list_check(self): 

97 """Raise exception if `snapshots` is empty""" 

98 if not self.snapshots: 

99 raise exceptions.NoSnapshots('snapshot_list object is empty.') 

100 

101 def working_list(self): 

102 """ 

103 Return the current value of `snapshots` as copy-by-value to prevent list 

104 stomping during iterations 

105 """ 

106 # Copy by value, rather than reference to prevent list stomping during 

107 # iterations 

108 return self.snapshots[:] 

109 

110 def _get_name_based_ages(self, timestring): 

111 """ 

112 Add a snapshot age to `snapshot_info` based on the age as indicated 

113 by the snapshot name pattern, if it matches `timestring`. This is 

114 stored at key ``age_by_name``. 

115 

116 :arg timestring: An strftime pattern 

117 """ 

118 # Check for empty list before proceeding here to prevent non-iterable 

119 # condition 

120 self.empty_list_check() 

121 tstamp = utils.TimestringSearch(timestring) 

122 for snapshot in self.working_list(): 

123 epoch = tstamp.get_epoch(snapshot) 

124 if epoch: 

125 self.snapshot_info[snapshot]['age_by_name'] = epoch 

126 else: 

127 self.snapshot_info[snapshot]['age_by_name'] = None 

128 

129 def _calculate_ages(self, source='creation_date', timestring=None): 

130 """ 

131 This method initiates snapshot age calculation based on the given 

132 parameters. Exceptions are raised when they are improperly configured. 

133 

134 Set instance variable `age_keyfield` for use later, if needed. 

135 

136 :arg source: Source of snapshot age. Can be 'name' or 'creation_date'. 

137 :arg timestring: An strftime string to match the datestamp in an 

138 snapshot name. Only used if ``source`` is ``name``. 

139 """ 

140 if source == 'name': 

141 self.age_keyfield = 'age_by_name' 

142 if not timestring: 

143 raise exceptions.MissingArgument( 

144 'source "name" requires the "timestring" keyword argument' 

145 ) 

146 self._get_name_based_ages(timestring) 

147 elif source == 'creation_date': 

148 self.age_keyfield = 'start_time_in_millis' 

149 else: 

150 raise ValueError( 

151 'Invalid source: {0}. ' 

152 'Must be "name", or "creation_date".'.format(source) 

153 ) 

154 

155 def _sort_by_age(self, snapshot_list, reverse=True): 

156 """ 

157 Take a list of snapshots and sort them by date. 

158 

159 By default, the youngest are first with `reverse=True`, but the oldest 

160 can be first by setting `reverse=False` 

161 """ 

162 # Do the age-based sorting here. 

163 # First, build an temporary dictionary with just snapshot and age 

164 # as the key and value, respectively 

165 temp = {} 

166 for snap in snapshot_list: 

167 if self.age_keyfield in self.snapshot_info[snap]: 

168 # This fixes #1366. Catch None is a potential age value. 

169 if self.snapshot_info[snap][self.age_keyfield]: 

170 temp[snap] = self.snapshot_info[snap][self.age_keyfield] 

171 else: 

172 msg = ' snapshot %s has no age' % snap 

173 self.__excludify(True, True, snap, msg) 

174 else: 

175 msg = ( 

176 '{0} does not have age key "{1}" in SnapshotList ' 

177 ' metadata'.format(snap, self.age_keyfield) 

178 ) 

179 self.__excludify(True, True, snap, msg) 

180 

181 # If reverse is True, this will sort so the youngest snapshots are 

182 # first. However, if you want oldest first, set reverse to False. 

183 # Effectively, this should set us up to act on everything older than 

184 # meets the other set criteria. 

185 # It starts as a tuple, but then becomes a list. 

186 sorted_tuple = ( 

187 sorted(temp.items(), key=lambda k: k[1], reverse=reverse) 

188 ) 

189 return [x[0] for x in sorted_tuple] 

190 

191 def most_recent(self): 

192 """ 

193 Return the most recent snapshot based on `start_time_in_millis`. 

194 """ 

195 self.empty_list_check() 

196 most_recent_time = 0 

197 most_recent_snap = '' 

198 for snapshot in self.snapshots: 

199 snaptime = utils.fix_epoch( 

200 self.snapshot_info[snapshot]['start_time_in_millis']) 

201 if snaptime > most_recent_time: 

202 most_recent_snap = snapshot 

203 most_recent_time = snaptime 

204 return most_recent_snap 

205 

206 

207 def filter_by_regex(self, kind=None, value=None, exclude=False): 

208 """ 

209 Filter out snapshots not matching the pattern, or in the case of 

210 exclude, filter those matching the pattern. 

211 

212 :arg kind: Can be one of: ``suffix``, ``prefix``, ``regex``, or 

213 ``timestring``. This option defines what kind of filter you will be 

214 building. 

215 :arg value: Depends on `kind`. It is the strftime string if `kind` is 

216 `timestring`. It's used to build the regular expression for other 

217 kinds. 

218 :arg exclude: If `exclude` is `True`, this filter will remove matching 

219 snapshots from `snapshots`. If `exclude` is `False`, then only 

220 matching snapshots will be kept in `snapshots`. 

221 Default is `False` 

222 """ 

223 if kind not in ['regex', 'prefix', 'suffix', 'timestring']: 

224 raise ValueError('{0}: Invalid value for kind'.format(kind)) 

225 

226 # Stop here if None or empty value, but zero is okay 

227 if value == 0: 

228 pass 

229 elif not value: 

230 raise ValueError( 

231 '{0}: Invalid value for "value". ' 

232 'Cannot be "None" type, empty, or False' 

233 ) 

234 

235 if kind == 'timestring': 

236 regex = settings.regex_map()[kind].format(utils.get_date_regex(value)) 

237 else: 

238 regex = settings.regex_map()[kind].format(value) 

239 

240 self.empty_list_check() 

241 pattern = re.compile(regex) 

242 for snapshot in self.working_list(): 

243 match = pattern.search(snapshot) 

244 self.loggit.debug('Filter by regex: Snapshot: {0}'.format(snapshot)) 

245 if match: 

246 self.__excludify(True, exclude, snapshot) 

247 else: 

248 self.__excludify(False, exclude, snapshot) 

249 

250 def filter_by_age( 

251 self, source='creation_date', direction=None, 

252 timestring=None, unit=None, unit_count=None, epoch=None, exclude=False 

253 ): 

254 """ 

255 Remove snapshots from `snapshots` by relative age calculations. 

256 

257 :arg source: Source of snapshot age. Can be 'name', or 'creation_date'. 

258 :arg direction: Time to filter, either ``older`` or ``younger`` 

259 :arg timestring: An strftime string to match the datestamp in an 

260 snapshot name. Only used for snapshot filtering by ``name``. 

261 :arg unit: One of ``seconds``, ``minutes``, ``hours``, ``days``, 

262 ``weeks``, ``months``, or ``years``. 

263 :arg unit_count: The number of ``unit`` (s). ``unit_count`` * ``unit`` will 

264 be calculated out to the relative number of seconds. 

265 :arg epoch: An epoch timestamp used in conjunction with ``unit`` and 

266 ``unit_count`` to establish a point of reference for calculations. 

267 If not provided, the current time will be used. 

268 :arg exclude: If `exclude` is `True`, this filter will remove matching 

269 snapshots from `snapshots`. If `exclude` is `False`, then only 

270 matching snapshots will be kept in `snapshots`. 

271 Default is `False` 

272 """ 

273 self.loggit.debug('Starting filter_by_age') 

274 # Get timestamp point of reference, por 

275 por = utils.get_point_of_reference(unit, unit_count, epoch) 

276 self.loggit.debug('Point of Reference: {0}'.format(por)) 

277 if not direction: 

278 raise exceptions.MissingArgument('Must provide a value for "direction"') 

279 if direction not in ['older', 'younger']: 

280 raise ValueError( 

281 'Invalid value for "direction": {0}'.format(direction) 

282 ) 

283 self._calculate_ages(source=source, timestring=timestring) 

284 for snapshot in self.working_list(): 

285 if not self.snapshot_info[snapshot][self.age_keyfield]: 

286 self.loggit.debug('Removing snapshot {0} for having no age') 

287 self.snapshots.remove(snapshot) 

288 continue 

289 msg = ( 

290 'Snapshot "{0}" age ({1}), direction: "{2}", point of ' 

291 'reference, ({3})'.format( 

292 snapshot, 

293 utils.fix_epoch(self.snapshot_info[snapshot][self.age_keyfield]), 

294 direction, 

295 por 

296 ) 

297 ) 

298 # Because time adds to epoch, smaller numbers are actually older 

299 # timestamps. 

300 snapshot_age = utils.fix_epoch( 

301 self.snapshot_info[snapshot][self.age_keyfield]) 

302 if direction == 'older': 

303 agetest = snapshot_age < por 

304 else: # 'younger' 

305 agetest = snapshot_age > por 

306 self.__excludify(agetest, exclude, snapshot, msg) 

307 

308 def filter_by_state(self, state=None, exclude=False): 

309 """ 

310 Filter out snapshots not matching ``state``, or in the case of exclude, 

311 filter those matching ``state``. 

312 

313 :arg state: The snapshot state to filter for. Must be one of 

314 ``SUCCESS``, ``PARTIAL``, ``FAILED``, or ``IN_PROGRESS``. 

315 :arg exclude: If `exclude` is `True`, this filter will remove matching 

316 snapshots from `snapshots`. If `exclude` is `False`, then only 

317 matching snapshots will be kept in `snapshots`. 

318 Default is `False` 

319 """ 

320 if state.upper() not in ['SUCCESS', 'PARTIAL', 'FAILED', 'IN_PROGRESS']: 

321 raise ValueError('{0}: Invalid value for state'.format(state)) 

322 

323 self.empty_list_check() 

324 for snapshot in self.working_list(): 

325 self.loggit.debug('Filter by state: Snapshot: {0}'.format(snapshot)) 

326 if self.snapshot_info[snapshot]['state'] == state: 

327 self.__excludify(True, exclude, snapshot) 

328 else: 

329 self.__excludify(False, exclude, snapshot) 

330 

331 def filter_none(self): 

332 """No filter at all""" 

333 self.loggit.debug('"None" filter selected. No filtering will be done.') 

334 

335 def filter_by_count( 

336 self, count=None, reverse=True, use_age=False, 

337 source='creation_date', timestring=None, exclude=True 

338 ): 

339 """ 

340 Remove snapshots from the actionable list beyond the number `count`, 

341 sorted reverse-alphabetically by default. If you set `reverse` to 

342 `False`, it will be sorted alphabetically. 

343 

344 The default is usually what you will want. If only one kind of snapshot 

345 is provided--for example, snapshots matching ``curator-%Y%m%d%H%M%S``-- 

346 then reverse alphabetical sorting will mean the oldest will remain in 

347 the list, because lower numbers in the dates mean older snapshots. 

348 

349 By setting `reverse` to `False`, then ``snapshot3`` will be acted on 

350 before ``snapshot2``, which will be acted on before ``snapshot1`` 

351 

352 `use_age` allows ordering snapshots by age. Age is determined by the 

353 snapshot creation date (as identified by ``start_time_in_millis``) by 

354 default, but you can also specify a `source` of ``name``. The ``name`` 

355 `source` requires the timestring argument. 

356 

357 :arg count: Filter snapshots beyond `count`. 

358 :arg reverse: The filtering direction. (default: `True`). 

359 :arg use_age: Sort snapshots by age. ``source`` is required in this 

360 case. 

361 :arg source: Source of snapshot age. Can be one of ``name``, or 

362 ``creation_date``. Default: ``creation_date`` 

363 :arg timestring: An strftime string to match the datestamp in a 

364 snapshot name. Only used if `source` ``name`` is selected. 

365 :arg exclude: If `exclude` is `True`, this filter will remove matching 

366 snapshots from `snapshots`. If `exclude` is `False`, then only 

367 matching snapshots will be kept in `snapshots`. 

368 Default is `True` 

369 """ 

370 self.loggit.debug('Filtering snapshots by count') 

371 if not count: 

372 raise exceptions.MissingArgument('No value for "count" provided') 

373 

374 # Create a copy-by-value working list 

375 working_list = self.working_list() 

376 

377 if use_age: 

378 self._calculate_ages(source=source, timestring=timestring) 

379 # Using default value of reverse=True in self._sort_by_age() 

380 sorted_snapshots = self._sort_by_age(working_list, reverse=reverse) 

381 else: 

382 # Default to sorting by snapshot name 

383 sorted_snapshots = sorted(working_list, reverse=reverse) 

384 

385 idx = 1 

386 for snap in sorted_snapshots: 

387 msg = ( 

388 '{0} is {1} of specified count of {2}.'.format( 

389 snap, idx, count 

390 ) 

391 ) 

392 condition = True if idx <= count else False 

393 self.__excludify(condition, exclude, snap, msg) 

394 idx += 1 

395 

396 def filter_period( 

397 self, period_type='relative', source='name', range_from=None, range_to=None, 

398 date_from=None, date_to=None, date_from_format=None, date_to_format=None, 

399 timestring=None, unit=None, week_starts_on='sunday', epoch=None, exclude=False 

400 ): 

401 """ 

402 Match `snapshots` with ages within a given period. 

403 

404 :arg period_type: Can be either ``absolute`` or ``relative``. Default is 

405 ``relative``. ``date_from`` and ``date_to`` are required when using 

406 ``period_type='absolute'`. ``range_from`` and ``range_to`` are 

407 required with ``period_type='relative'`. 

408 :arg source: Source of snapshot age. Can be 'name', or 'creation_date'. 

409 :arg range_from: How many ``unit`` (s) in the past/future is the origin? 

410 :arg range_to: How many ``unit`` (s) in the past/future is the end point? 

411 :arg date_from: The simplified date for the start of the range 

412 :arg date_to: The simplified date for the end of the range. If this value 

413 is the same as ``date_from``, the full value of ``unit`` will be 

414 extrapolated for the range. For example, if ``unit`` is ``months``, 

415 and ``date_from`` and ``date_to`` are both ``2017.01``, then the entire 

416 month of January 2017 will be the absolute date range. 

417 :arg date_from_format: The strftime string used to parse ``date_from`` 

418 :arg date_to_format: The strftime string used to parse ``date_to`` 

419 :arg timestring: An strftime string to match the datestamp in an 

420 snapshot name. Only used for snapshot filtering by ``name``. 

421 :arg unit: One of ``hours``, ``days``, ``weeks``, ``months``, or 

422 ``years``. 

423 :arg week_starts_on: Either ``sunday`` or ``monday``. Default is 

424 ``sunday`` 

425 :arg epoch: An epoch timestamp used to establish a point of reference 

426 for calculations. If not provided, the current time will be used. 

427 :arg exclude: If `exclude` is `True`, this filter will remove matching 

428 indices from `indices`. If `exclude` is `False`, then only matching 

429 indices will be kept in `indices`. 

430 Default is `False` 

431 """ 

432 

433 self.loggit.debug('Filtering snapshots by period') 

434 if period_type not in ['absolute', 'relative']: 

435 raise ValueError( 

436 'Unacceptable value: {0} -- "period_type" must be either ' 

437 '"absolute" or "relative".'.format(period_type) 

438 ) 

439 self.loggit.debug('period_type = {0}'.format(period_type)) 

440 if period_type == 'relative': 

441 func = utils.date_range 

442 args = [unit, range_from, range_to, epoch] 

443 kwgs = {'week_starts_on': week_starts_on} 

444 try: 

445 range_from = int(range_from) 

446 range_to = int(range_to) 

447 except ValueError as err: 

448 raise exceptions.ConfigurationError( 

449 '"range_from" and "range_to" must be integer values. Error: {0}'.format(err)) 

450 else: 

451 func = utils.absolute_date_range 

452 args = [unit, date_from, date_to] 

453 kwgs = { 

454 'date_from_format': date_from_format, 

455 'date_to_format': date_to_format 

456 } 

457 for reqd in [date_from, date_to, date_from_format, date_to_format]: 

458 if not reqd: 

459 raise exceptions.ConfigurationError( 

460 'Must provide "date_from", "date_to", ' 

461 '"date_from_format", and "date_to_format" with ' 

462 'absolute period_type' 

463 ) 

464 try: 

465 start, end = func(*args, **kwgs) 

466 except Exception as err: 

467 utils.report_failure(err) 

468 self._calculate_ages(source=source, timestring=timestring) 

469 for snapshot in self.working_list(): 

470 if not self.snapshot_info[snapshot][self.age_keyfield]: 

471 self.loggit.debug('Removing snapshot {0} for having no age') 

472 self.snapshots.remove(snapshot) 

473 continue 

474 age = utils.fix_epoch(self.snapshot_info[snapshot][self.age_keyfield]) 

475 msg = ( 

476 'Snapshot "{0}" age ({1}), period start: "{2}", period ' 

477 'end, ({3})'.format( 

478 snapshot, 

479 age, 

480 start, 

481 end 

482 ) 

483 ) 

484 # Because time adds to epoch, smaller numbers are actually older 

485 # timestamps. 

486 inrange = ((age >= start) and (age <= end)) 

487 self.__excludify(inrange, exclude, snapshot, msg) 

488 

489 def iterate_filters(self, config): 

490 """ 

491 Iterate over the filters defined in `config` and execute them. 

492 

493 

494 

495 :arg config: A dictionary of filters, as extracted from the YAML 

496 configuration file. 

497 

498 .. note:: `config` should be a dictionary with the following form: 

499 .. code-block:: python 

500 

501 { 'filters' : [ 

502 { 

503 'filtertype': 'the_filter_type', 

504 'key1' : 'value1', 

505 ... 

506 'keyN' : 'valueN' 

507 } 

508 ] 

509 } 

510 

511 """ 

512 # Make sure we actually _have_ filters to act on 

513 if not 'filters' in config or not config['filters']: 

514 self.loggit.info('No filters in config. Returning unaltered object.') 

515 return 

516 

517 self.loggit.debug('All filters: {0}'.format(config['filters'])) 

518 for fltr in config['filters']: 

519 self.loggit.debug('Top of the loop: {0}'.format(self.snapshots)) 

520 self.loggit.debug('Un-parsed filter args: {0}'.format(fltr)) 

521 self.loggit.debug( 

522 'Parsed filter args: {0}'.format( 

523 SchemaCheck( 

524 fltr, 

525 filters.structure(), 

526 'filter', 

527 'SnapshotList.iterate_filters' 

528 ).result() 

529 ) 

530 ) 

531 method = self.__map_method(fltr['filtertype']) 

532 # Remove key 'filtertype' from dictionary 'fltr' 

533 del fltr['filtertype'] 

534 # If it's a filtertype with arguments, update the defaults with the 

535 # provided settings. 

536 self.loggit.debug('Filter args: {0}'.format(fltr)) 

537 self.loggit.debug('Pre-instance: {0}'.format(self.snapshots)) 

538 method(**fltr) 

539 self.loggit.debug('Post-instance: {0}'.format(self.snapshots))