Coverage for curator/snapshotlist.py: 100%
206 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-16 15:27 -0600
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-16 15:27 -0600
1"""SnapshotList"""
2import time
3import re
4import logging
5from datetime import timedelta, datetime, date
6from curator import exceptions, utils
7from curator.defaults import settings
8from curator.validators import SchemaCheck, filters
10class SnapshotList(object):
11 """Snapshot list object"""
12 def __init__(self, client, repository=None):
13 utils.verify_client_object(client)
14 if not repository:
15 raise exceptions.MissingArgument('No value for "repository" provided')
16 if not utils.repository_exists(client, repository):
17 raise exceptions.FailedExecution(
18 'Unable to verify existence of repository '
19 '{0}'.format(repository)
20 )
21 self.loggit = logging.getLogger('curator.snapshotlist')
22 #: An Elasticsearch Client object.
23 #: Also accessible as an instance variable.
24 self.client = client
25 #: An Elasticsearch repository.
26 #: Also accessible as an instance variable.
27 self.repository = repository
28 #: Instance variable.
29 #: Information extracted from snapshots, such as age, etc.
30 #: Populated by internal method `__get_snapshots` at instance creation
31 #: time. **Type:** ``dict()``
32 self.snapshot_info = {}
33 #: Instance variable.
34 #: The running list of snapshots which will be used by an Action class.
35 #: Populated by internal methods `__get_snapshots` at instance creation
36 #: time. **Type:** ``list()``
37 self.snapshots = []
38 #: Instance variable.
39 #: Raw data dump of all snapshots in the repository at instance creation
40 #: time. **Type:** ``list()`` of ``dict()`` data.
41 self.__get_snapshots()
44 def __actionable(self, snap):
45 self.loggit.debug(
46 'Snapshot {0} is actionable and remains in the list.'.format(snap))
48 def __not_actionable(self, snap):
49 self.loggit.debug(
50 'Snapshot {0} is not actionable, removing from '
51 'list.'.format(snap)
52 )
53 self.snapshots.remove(snap)
55 def __excludify(self, condition, exclude, snap, msg=None):
56 if condition:
57 if exclude:
58 text = "Removed from actionable list"
59 self.__not_actionable(snap)
60 else:
61 text = "Remains in actionable list"
62 self.__actionable(snap)
63 else:
64 if exclude:
65 text = "Remains in actionable list"
66 self.__actionable(snap)
67 else:
68 text = "Removed from actionable list"
69 self.__not_actionable(snap)
70 if msg:
71 self.loggit.debug('{0}: {1}'.format(text, msg))
73 def __get_snapshots(self):
74 """
75 Pull all snapshots into `snapshots` and populate
76 `snapshot_info`
77 """
78 self.all_snapshots = utils.get_snapshot_data(self.client, self.repository)
79 for list_item in self.all_snapshots:
80 if 'snapshot' in list_item.keys():
81 self.snapshots.append(list_item['snapshot'])
82 self.snapshot_info[list_item['snapshot']] = list_item
83 self.empty_list_check()
85 def __map_method(self, ftype):
86 methods = {
87 'age': self.filter_by_age,
88 'count': self.filter_by_count,
89 'none': self.filter_none,
90 'pattern': self.filter_by_regex,
91 'period': self.filter_period,
92 'state': self.filter_by_state,
93 }
94 return methods[ftype]
96 def empty_list_check(self):
97 """Raise exception if `snapshots` is empty"""
98 if not self.snapshots:
99 raise exceptions.NoSnapshots('snapshot_list object is empty.')
101 def working_list(self):
102 """
103 Return the current value of `snapshots` as copy-by-value to prevent list
104 stomping during iterations
105 """
106 # Copy by value, rather than reference to prevent list stomping during
107 # iterations
108 return self.snapshots[:]
110 def _get_name_based_ages(self, timestring):
111 """
112 Add a snapshot age to `snapshot_info` based on the age as indicated
113 by the snapshot name pattern, if it matches `timestring`. This is
114 stored at key ``age_by_name``.
116 :arg timestring: An strftime pattern
117 """
118 # Check for empty list before proceeding here to prevent non-iterable
119 # condition
120 self.empty_list_check()
121 tstamp = utils.TimestringSearch(timestring)
122 for snapshot in self.working_list():
123 epoch = tstamp.get_epoch(snapshot)
124 if epoch:
125 self.snapshot_info[snapshot]['age_by_name'] = epoch
126 else:
127 self.snapshot_info[snapshot]['age_by_name'] = None
129 def _calculate_ages(self, source='creation_date', timestring=None):
130 """
131 This method initiates snapshot age calculation based on the given
132 parameters. Exceptions are raised when they are improperly configured.
134 Set instance variable `age_keyfield` for use later, if needed.
136 :arg source: Source of snapshot age. Can be 'name' or 'creation_date'.
137 :arg timestring: An strftime string to match the datestamp in an
138 snapshot name. Only used if ``source`` is ``name``.
139 """
140 if source == 'name':
141 self.age_keyfield = 'age_by_name'
142 if not timestring:
143 raise exceptions.MissingArgument(
144 'source "name" requires the "timestring" keyword argument'
145 )
146 self._get_name_based_ages(timestring)
147 elif source == 'creation_date':
148 self.age_keyfield = 'start_time_in_millis'
149 else:
150 raise ValueError(
151 'Invalid source: {0}. '
152 'Must be "name", or "creation_date".'.format(source)
153 )
155 def _sort_by_age(self, snapshot_list, reverse=True):
156 """
157 Take a list of snapshots and sort them by date.
159 By default, the youngest are first with `reverse=True`, but the oldest
160 can be first by setting `reverse=False`
161 """
162 # Do the age-based sorting here.
163 # First, build an temporary dictionary with just snapshot and age
164 # as the key and value, respectively
165 temp = {}
166 for snap in snapshot_list:
167 if self.age_keyfield in self.snapshot_info[snap]:
168 # This fixes #1366. Catch None is a potential age value.
169 if self.snapshot_info[snap][self.age_keyfield]:
170 temp[snap] = self.snapshot_info[snap][self.age_keyfield]
171 else:
172 msg = ' snapshot %s has no age' % snap
173 self.__excludify(True, True, snap, msg)
174 else:
175 msg = (
176 '{0} does not have age key "{1}" in SnapshotList '
177 ' metadata'.format(snap, self.age_keyfield)
178 )
179 self.__excludify(True, True, snap, msg)
181 # If reverse is True, this will sort so the youngest snapshots are
182 # first. However, if you want oldest first, set reverse to False.
183 # Effectively, this should set us up to act on everything older than
184 # meets the other set criteria.
185 # It starts as a tuple, but then becomes a list.
186 sorted_tuple = (
187 sorted(temp.items(), key=lambda k: k[1], reverse=reverse)
188 )
189 return [x[0] for x in sorted_tuple]
191 def most_recent(self):
192 """
193 Return the most recent snapshot based on `start_time_in_millis`.
194 """
195 self.empty_list_check()
196 most_recent_time = 0
197 most_recent_snap = ''
198 for snapshot in self.snapshots:
199 snaptime = utils.fix_epoch(
200 self.snapshot_info[snapshot]['start_time_in_millis'])
201 if snaptime > most_recent_time:
202 most_recent_snap = snapshot
203 most_recent_time = snaptime
204 return most_recent_snap
207 def filter_by_regex(self, kind=None, value=None, exclude=False):
208 """
209 Filter out snapshots not matching the pattern, or in the case of
210 exclude, filter those matching the pattern.
212 :arg kind: Can be one of: ``suffix``, ``prefix``, ``regex``, or
213 ``timestring``. This option defines what kind of filter you will be
214 building.
215 :arg value: Depends on `kind`. It is the strftime string if `kind` is
216 `timestring`. It's used to build the regular expression for other
217 kinds.
218 :arg exclude: If `exclude` is `True`, this filter will remove matching
219 snapshots from `snapshots`. If `exclude` is `False`, then only
220 matching snapshots will be kept in `snapshots`.
221 Default is `False`
222 """
223 if kind not in ['regex', 'prefix', 'suffix', 'timestring']:
224 raise ValueError('{0}: Invalid value for kind'.format(kind))
226 # Stop here if None or empty value, but zero is okay
227 if value == 0:
228 pass
229 elif not value:
230 raise ValueError(
231 '{0}: Invalid value for "value". '
232 'Cannot be "None" type, empty, or False'
233 )
235 if kind == 'timestring':
236 regex = settings.regex_map()[kind].format(utils.get_date_regex(value))
237 else:
238 regex = settings.regex_map()[kind].format(value)
240 self.empty_list_check()
241 pattern = re.compile(regex)
242 for snapshot in self.working_list():
243 match = pattern.search(snapshot)
244 self.loggit.debug('Filter by regex: Snapshot: {0}'.format(snapshot))
245 if match:
246 self.__excludify(True, exclude, snapshot)
247 else:
248 self.__excludify(False, exclude, snapshot)
250 def filter_by_age(
251 self, source='creation_date', direction=None,
252 timestring=None, unit=None, unit_count=None, epoch=None, exclude=False
253 ):
254 """
255 Remove snapshots from `snapshots` by relative age calculations.
257 :arg source: Source of snapshot age. Can be 'name', or 'creation_date'.
258 :arg direction: Time to filter, either ``older`` or ``younger``
259 :arg timestring: An strftime string to match the datestamp in an
260 snapshot name. Only used for snapshot filtering by ``name``.
261 :arg unit: One of ``seconds``, ``minutes``, ``hours``, ``days``,
262 ``weeks``, ``months``, or ``years``.
263 :arg unit_count: The number of ``unit`` (s). ``unit_count`` * ``unit`` will
264 be calculated out to the relative number of seconds.
265 :arg epoch: An epoch timestamp used in conjunction with ``unit`` and
266 ``unit_count`` to establish a point of reference for calculations.
267 If not provided, the current time will be used.
268 :arg exclude: If `exclude` is `True`, this filter will remove matching
269 snapshots from `snapshots`. If `exclude` is `False`, then only
270 matching snapshots will be kept in `snapshots`.
271 Default is `False`
272 """
273 self.loggit.debug('Starting filter_by_age')
274 # Get timestamp point of reference, por
275 por = utils.get_point_of_reference(unit, unit_count, epoch)
276 self.loggit.debug('Point of Reference: {0}'.format(por))
277 if not direction:
278 raise exceptions.MissingArgument('Must provide a value for "direction"')
279 if direction not in ['older', 'younger']:
280 raise ValueError(
281 'Invalid value for "direction": {0}'.format(direction)
282 )
283 self._calculate_ages(source=source, timestring=timestring)
284 for snapshot in self.working_list():
285 if not self.snapshot_info[snapshot][self.age_keyfield]:
286 self.loggit.debug('Removing snapshot {0} for having no age')
287 self.snapshots.remove(snapshot)
288 continue
289 msg = (
290 'Snapshot "{0}" age ({1}), direction: "{2}", point of '
291 'reference, ({3})'.format(
292 snapshot,
293 utils.fix_epoch(self.snapshot_info[snapshot][self.age_keyfield]),
294 direction,
295 por
296 )
297 )
298 # Because time adds to epoch, smaller numbers are actually older
299 # timestamps.
300 snapshot_age = utils.fix_epoch(
301 self.snapshot_info[snapshot][self.age_keyfield])
302 if direction == 'older':
303 agetest = snapshot_age < por
304 else: # 'younger'
305 agetest = snapshot_age > por
306 self.__excludify(agetest, exclude, snapshot, msg)
308 def filter_by_state(self, state=None, exclude=False):
309 """
310 Filter out snapshots not matching ``state``, or in the case of exclude,
311 filter those matching ``state``.
313 :arg state: The snapshot state to filter for. Must be one of
314 ``SUCCESS``, ``PARTIAL``, ``FAILED``, or ``IN_PROGRESS``.
315 :arg exclude: If `exclude` is `True`, this filter will remove matching
316 snapshots from `snapshots`. If `exclude` is `False`, then only
317 matching snapshots will be kept in `snapshots`.
318 Default is `False`
319 """
320 if state.upper() not in ['SUCCESS', 'PARTIAL', 'FAILED', 'IN_PROGRESS']:
321 raise ValueError('{0}: Invalid value for state'.format(state))
323 self.empty_list_check()
324 for snapshot in self.working_list():
325 self.loggit.debug('Filter by state: Snapshot: {0}'.format(snapshot))
326 if self.snapshot_info[snapshot]['state'] == state:
327 self.__excludify(True, exclude, snapshot)
328 else:
329 self.__excludify(False, exclude, snapshot)
331 def filter_none(self):
332 """No filter at all"""
333 self.loggit.debug('"None" filter selected. No filtering will be done.')
335 def filter_by_count(
336 self, count=None, reverse=True, use_age=False,
337 source='creation_date', timestring=None, exclude=True
338 ):
339 """
340 Remove snapshots from the actionable list beyond the number `count`,
341 sorted reverse-alphabetically by default. If you set `reverse` to
342 `False`, it will be sorted alphabetically.
344 The default is usually what you will want. If only one kind of snapshot
345 is provided--for example, snapshots matching ``curator-%Y%m%d%H%M%S``--
346 then reverse alphabetical sorting will mean the oldest will remain in
347 the list, because lower numbers in the dates mean older snapshots.
349 By setting `reverse` to `False`, then ``snapshot3`` will be acted on
350 before ``snapshot2``, which will be acted on before ``snapshot1``
352 `use_age` allows ordering snapshots by age. Age is determined by the
353 snapshot creation date (as identified by ``start_time_in_millis``) by
354 default, but you can also specify a `source` of ``name``. The ``name``
355 `source` requires the timestring argument.
357 :arg count: Filter snapshots beyond `count`.
358 :arg reverse: The filtering direction. (default: `True`).
359 :arg use_age: Sort snapshots by age. ``source`` is required in this
360 case.
361 :arg source: Source of snapshot age. Can be one of ``name``, or
362 ``creation_date``. Default: ``creation_date``
363 :arg timestring: An strftime string to match the datestamp in a
364 snapshot name. Only used if `source` ``name`` is selected.
365 :arg exclude: If `exclude` is `True`, this filter will remove matching
366 snapshots from `snapshots`. If `exclude` is `False`, then only
367 matching snapshots will be kept in `snapshots`.
368 Default is `True`
369 """
370 self.loggit.debug('Filtering snapshots by count')
371 if not count:
372 raise exceptions.MissingArgument('No value for "count" provided')
374 # Create a copy-by-value working list
375 working_list = self.working_list()
377 if use_age:
378 self._calculate_ages(source=source, timestring=timestring)
379 # Using default value of reverse=True in self._sort_by_age()
380 sorted_snapshots = self._sort_by_age(working_list, reverse=reverse)
381 else:
382 # Default to sorting by snapshot name
383 sorted_snapshots = sorted(working_list, reverse=reverse)
385 idx = 1
386 for snap in sorted_snapshots:
387 msg = (
388 '{0} is {1} of specified count of {2}.'.format(
389 snap, idx, count
390 )
391 )
392 condition = True if idx <= count else False
393 self.__excludify(condition, exclude, snap, msg)
394 idx += 1
396 def filter_period(
397 self, period_type='relative', source='name', range_from=None, range_to=None,
398 date_from=None, date_to=None, date_from_format=None, date_to_format=None,
399 timestring=None, unit=None, week_starts_on='sunday', epoch=None, exclude=False
400 ):
401 """
402 Match `snapshots` with ages within a given period.
404 :arg period_type: Can be either ``absolute`` or ``relative``. Default is
405 ``relative``. ``date_from`` and ``date_to`` are required when using
406 ``period_type='absolute'`. ``range_from`` and ``range_to`` are
407 required with ``period_type='relative'`.
408 :arg source: Source of snapshot age. Can be 'name', or 'creation_date'.
409 :arg range_from: How many ``unit`` (s) in the past/future is the origin?
410 :arg range_to: How many ``unit`` (s) in the past/future is the end point?
411 :arg date_from: The simplified date for the start of the range
412 :arg date_to: The simplified date for the end of the range. If this value
413 is the same as ``date_from``, the full value of ``unit`` will be
414 extrapolated for the range. For example, if ``unit`` is ``months``,
415 and ``date_from`` and ``date_to`` are both ``2017.01``, then the entire
416 month of January 2017 will be the absolute date range.
417 :arg date_from_format: The strftime string used to parse ``date_from``
418 :arg date_to_format: The strftime string used to parse ``date_to``
419 :arg timestring: An strftime string to match the datestamp in an
420 snapshot name. Only used for snapshot filtering by ``name``.
421 :arg unit: One of ``hours``, ``days``, ``weeks``, ``months``, or
422 ``years``.
423 :arg week_starts_on: Either ``sunday`` or ``monday``. Default is
424 ``sunday``
425 :arg epoch: An epoch timestamp used to establish a point of reference
426 for calculations. If not provided, the current time will be used.
427 :arg exclude: If `exclude` is `True`, this filter will remove matching
428 indices from `indices`. If `exclude` is `False`, then only matching
429 indices will be kept in `indices`.
430 Default is `False`
431 """
433 self.loggit.debug('Filtering snapshots by period')
434 if period_type not in ['absolute', 'relative']:
435 raise ValueError(
436 'Unacceptable value: {0} -- "period_type" must be either '
437 '"absolute" or "relative".'.format(period_type)
438 )
439 self.loggit.debug('period_type = {0}'.format(period_type))
440 if period_type == 'relative':
441 func = utils.date_range
442 args = [unit, range_from, range_to, epoch]
443 kwgs = {'week_starts_on': week_starts_on}
444 try:
445 range_from = int(range_from)
446 range_to = int(range_to)
447 except ValueError as err:
448 raise exceptions.ConfigurationError(
449 '"range_from" and "range_to" must be integer values. Error: {0}'.format(err))
450 else:
451 func = utils.absolute_date_range
452 args = [unit, date_from, date_to]
453 kwgs = {
454 'date_from_format': date_from_format,
455 'date_to_format': date_to_format
456 }
457 for reqd in [date_from, date_to, date_from_format, date_to_format]:
458 if not reqd:
459 raise exceptions.ConfigurationError(
460 'Must provide "date_from", "date_to", '
461 '"date_from_format", and "date_to_format" with '
462 'absolute period_type'
463 )
464 try:
465 start, end = func(*args, **kwgs)
466 except Exception as err:
467 utils.report_failure(err)
468 self._calculate_ages(source=source, timestring=timestring)
469 for snapshot in self.working_list():
470 if not self.snapshot_info[snapshot][self.age_keyfield]:
471 self.loggit.debug('Removing snapshot {0} for having no age')
472 self.snapshots.remove(snapshot)
473 continue
474 age = utils.fix_epoch(self.snapshot_info[snapshot][self.age_keyfield])
475 msg = (
476 'Snapshot "{0}" age ({1}), period start: "{2}", period '
477 'end, ({3})'.format(
478 snapshot,
479 age,
480 start,
481 end
482 )
483 )
484 # Because time adds to epoch, smaller numbers are actually older
485 # timestamps.
486 inrange = ((age >= start) and (age <= end))
487 self.__excludify(inrange, exclude, snapshot, msg)
489 def iterate_filters(self, config):
490 """
491 Iterate over the filters defined in `config` and execute them.
495 :arg config: A dictionary of filters, as extracted from the YAML
496 configuration file.
498 .. note:: `config` should be a dictionary with the following form:
499 .. code-block:: python
501 { 'filters' : [
502 {
503 'filtertype': 'the_filter_type',
504 'key1' : 'value1',
505 ...
506 'keyN' : 'valueN'
507 }
508 ]
509 }
511 """
512 # Make sure we actually _have_ filters to act on
513 if not 'filters' in config or not config['filters']:
514 self.loggit.info('No filters in config. Returning unaltered object.')
515 return
517 self.loggit.debug('All filters: {0}'.format(config['filters']))
518 for fltr in config['filters']:
519 self.loggit.debug('Top of the loop: {0}'.format(self.snapshots))
520 self.loggit.debug('Un-parsed filter args: {0}'.format(fltr))
521 self.loggit.debug(
522 'Parsed filter args: {0}'.format(
523 SchemaCheck(
524 fltr,
525 filters.structure(),
526 'filter',
527 'SnapshotList.iterate_filters'
528 ).result()
529 )
530 )
531 method = self.__map_method(fltr['filtertype'])
532 # Remove key 'filtertype' from dictionary 'fltr'
533 del fltr['filtertype']
534 # If it's a filtertype with arguments, update the defaults with the
535 # provided settings.
536 self.loggit.debug('Filter args: {0}'.format(fltr))
537 self.loggit.debug('Pre-instance: {0}'.format(self.snapshots))
538 method(**fltr)
539 self.loggit.debug('Post-instance: {0}'.format(self.snapshots))