Coverage for curator/indexlist.py: 94%
534 statements
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-16 15:36 -0600
« prev ^ index » next coverage.py v7.3.0, created at 2023-08-16 15:36 -0600
1from datetime import timedelta, datetime, date
2import time
3import re
4import itertools
5import logging
6from elasticsearch7.exceptions import NotFoundError, TransportError
7from curator import exceptions, utils
8from curator.defaults import settings
9from curator.validators import SchemaCheck, filters
11class IndexList(object):
12 def __init__(self, client):
13 utils.verify_client_object(client)
14 self.loggit = logging.getLogger('curator.indexlist')
15 #: An Elasticsearch Client object
16 #: Also accessible as an instance variable.
17 self.client = client
18 #: Instance variable.
19 #: Information extracted from indices, such as segment count, age, etc.
20 #: Populated at instance creation time, and by other private helper
21 #: methods, as needed. **Type:** ``dict()``
22 self.index_info = {}
23 #: Instance variable.
24 #: The running list of indices which will be used by an Action class.
25 #: Populated at instance creation time. **Type:** ``list()``
26 self.indices = []
27 #: Instance variable.
28 #: All indices in the cluster at instance creation time.
29 #: **Type:** ``list()``
30 self.all_indices = []
31 self.__get_indices()
33 def __actionable(self, idx):
34 self.loggit.debug(
35 'Index {0} is actionable and remains in the list.'.format(idx))
37 def __not_actionable(self, idx):
38 self.loggit.debug(
39 'Index {0} is not actionable, removing from list.'.format(idx))
40 self.indices.remove(idx)
42 def __excludify(self, condition, exclude, index, msg=None):
43 if condition == True:
44 if exclude:
45 text = "Removed from actionable list"
46 self.__not_actionable(index)
47 else:
48 text = "Remains in actionable list"
49 self.__actionable(index)
50 else:
51 if exclude:
52 text = "Remains in actionable list"
53 self.__actionable(index)
54 else:
55 text = "Removed from actionable list"
56 self.__not_actionable(index)
57 if msg:
58 self.loggit.debug('{0}: {1}'.format(text, msg))
60 def __get_indices(self):
61 """
62 Pull all indices into `all_indices`, then populate `indices` and
63 `index_info`
64 """
65 self.loggit.debug('Getting all indices')
66 self.all_indices = utils.get_indices(self.client)
67 self.indices = self.all_indices[:]
68 if self.indices:
69 for index in self.indices:
70 self.__build_index_info(index)
71 self._get_metadata()
72 self._get_index_stats()
74 def __build_index_info(self, index):
75 """
76 Ensure that `index` is a key in `index_info`. If not, create a
77 sub-dictionary structure under that key.
78 """
79 self.loggit.debug(
80 'Building preliminary index metadata for {0}'.format(index))
81 if not index in self.index_info:
82 self.index_info[index] = {
83 "age" : {},
84 "number_of_replicas" : 0,
85 "number_of_shards" : 0,
86 "segments" : 0,
87 "size_in_bytes" : 0,
88 "docs" : 0,
89 "state" : "",
90 }
92 def __map_method(self, ft):
93 methods = {
94 'alias': self.filter_by_alias,
95 'age': self.filter_by_age,
96 'allocated': self.filter_allocated,
97 'closed': self.filter_closed,
98 'count': self.filter_by_count,
99 'empty': self.filter_empty,
100 'forcemerged': self.filter_forceMerged,
101 'ilm': self.filter_ilm,
102 'kibana': self.filter_kibana,
103 'none': self.filter_none,
104 'opened': self.filter_opened,
105 'period': self.filter_period,
106 'pattern': self.filter_by_regex,
107 'space': self.filter_by_space,
108 'shards': self.filter_by_shards,
109 'size': self.filter_by_size,
110 }
111 return methods[ft]
113 def _get_index_stats(self):
114 """
115 Populate `index_info` with index `size_in_bytes`, `primary_size_in_bytes` and doc count
116 information for each index.
117 """
118 self.loggit.debug('Getting index stats')
119 self.empty_list_check()
120 # Subroutine to do the dirty work
121 def iterate_over_stats(stats):
122 for index in stats['indices']:
123 size = stats['indices'][index]['total']['store']['size_in_bytes']
124 docs = stats['indices'][index]['total']['docs']['count']
125 primary_size = stats['indices'][index]['primaries']['store']['size_in_bytes']
127 self.loggit.debug(
128 'Index: {0} Size: {1} Docs: {2} PrimarySize: {3}'.format(
129 index, utils.byte_size(size), docs, utils.byte_size(primary_size)
130 )
131 )
132 self.index_info[index]['size_in_bytes'] = size
133 self.index_info[index]['docs'] = docs
134 self.index_info[index]['primary_size_in_bytes'] = primary_size
136 working_list = self.working_list()
137 for index in self.working_list():
138 if self.index_info[index]['state'] == 'close':
139 working_list.remove(index)
140 if working_list:
141 index_lists = utils.chunk_index_list(working_list)
142 for l in index_lists:
143 stats_result = {}
145 try:
146 stats_result.update(self._get_indices_stats(l))
147 except TransportError as err:
148 if err.status_code == 413:
149 self.loggit.debug('Huge Payload 413 Error - Trying to get information with multiple requests')
150 stats_result = {}
151 stats_result.update(self._bulk_queries(l, self._get_indices_stats))
153 iterate_over_stats(stats_result)
155 def _get_indices_stats(self, data):
156 return self.client.indices.stats(index=utils.to_csv(data), metric='store,docs')
158 def _bulk_queries(self, data, exec_func):
159 slice_number = 10
160 query_result = {}
161 loop_number = round(len(data)/slice_number) if round(len(data)/slice_number) > 0 else 1
162 self.loggit.debug("Bulk Queries - number requests created: {0}".format(loop_number))
164 for num in range(0, loop_number):
165 if num == (loop_number-1):
166 data_sliced = data[num*slice_number:]
167 else:
168 data_sliced = data[num*slice_number:(num+1)*slice_number]
169 query_result.update(exec_func(data_sliced))
171 return query_result
173 def _get_cluster_state(self, data):
174 return self.client.cluster.state(index=utils.to_csv(data), metric='metadata')['metadata']['indices']
176 def _get_metadata(self):
177 """
178 Populate `index_info` with index `size_in_bytes` and doc count
179 information for each index.
180 """
181 self.loggit.debug('Getting index metadata')
182 self.empty_list_check()
183 index_lists = utils.chunk_index_list(self.indices)
184 for l in index_lists:
185 working_list = {}
186 try:
187 working_list.update(self._get_cluster_state(l))
188 except TransportError as err:
189 if err.status_code == 413:
190 self.loggit.debug('Huge Payload 413 Error - Trying to get information with multiple requests')
191 working_list = {}
192 working_list.update(self._bulk_queries(l, self._get_cluster_state))
194 if working_list:
195 for index in list(working_list.keys()):
196 s = self.index_info[index]
197 wl = working_list[index]
199 if 'settings' not in wl:
200 # Used by AWS ES <= 5.1
201 # We can try to get the same info from index/_settings.
202 # workaround for https://github.com/elastic/curator/issues/880
203 alt_wl = self.client.indices.get(index, feature='_settings')[index]
204 wl['settings'] = alt_wl['settings']
206 if 'creation_date' not in wl['settings']['index']:
207 self.loggit.warning(
208 'Index: {0} has no "creation_date"! This implies '
209 'that the index predates Elasticsearch v1.4. For '
210 'safety, this index will be removed from the '
211 'actionable list.'.format(index)
212 )
213 self.__not_actionable(index)
214 else:
215 s['age']['creation_date'] = (
216 utils.fix_epoch(wl['settings']['index']['creation_date'])
217 )
218 s['number_of_replicas'] = (
219 wl['settings']['index']['number_of_replicas']
220 )
221 s['number_of_shards'] = (
222 wl['settings']['index']['number_of_shards']
223 )
224 s['state'] = wl['state']
225 if 'routing' in wl['settings']['index']:
226 s['routing'] = wl['settings']['index']['routing']
228 def empty_list_check(self):
229 """Raise exception if `indices` is empty"""
230 self.loggit.debug('Checking for empty list')
231 if not self.indices:
232 raise exceptions.NoIndices('index_list object is empty.')
234 def working_list(self):
235 """
236 Return the current value of `indices` as copy-by-value to prevent list
237 stomping during iterations
238 """
239 # Copy by value, rather than reference to prevent list stomping during
240 # iterations
241 self.loggit.debug('Generating working list of indices')
242 return self.indices[:]
244 def _get_indices_segments(self, data):
245 return self.client.indices.segments(index=utils.to_csv(data))['indices'].copy()
247 def _get_segment_counts(self):
248 """
249 Populate `index_info` with segment information for each index.
250 """
251 self.loggit.debug('Getting index segment counts')
252 self.empty_list_check()
253 index_lists = utils.chunk_index_list(self.indices)
254 for l in index_lists:
255 working_list = {}
256 try:
257 working_list.update(self._get_indices_segments(l))
258 except TransportError as err:
259 if err.status_code == 413:
260 self.loggit.debug('Huge Payload 413 Error - Trying to get information with multiple requests')
261 working_list = {}
262 working_list.update(self._bulk_queries(l, self._get_indices_segments))
264 if working_list:
265 for index in list(working_list.keys()):
266 shards = working_list[index]['shards']
267 segmentcount = 0
268 for shardnum in shards:
269 for shard in range(0,len(shards[shardnum])):
270 segmentcount += (
271 shards[shardnum][shard]['num_search_segments']
272 )
273 self.index_info[index]['segments'] = segmentcount
275 def _get_name_based_ages(self, timestring):
276 """
277 Add indices to `index_info` based on the age as indicated by the index
278 name pattern, if it matches `timestring`
280 :arg timestring: An strftime pattern
281 """
282 # Check for empty list before proceeding here to prevent non-iterable
283 # condition
284 self.loggit.debug('Getting ages of indices by "name"')
285 self.empty_list_check()
286 ts = utils.TimestringSearch(timestring)
287 for index in self.working_list():
288 epoch = ts.get_epoch(index)
289 if isinstance(epoch, int):
290 self.index_info[index]['age']['name'] = epoch
292 def _get_field_stats_dates(self, field='@timestamp'):
293 """
294 Add indices to `index_info` based on the values the queries return,
295 as determined by the min and max aggregated values of `field`
297 :arg field: The field with the date value. The field must be mapped in
298 elasticsearch as a date datatype. Default: ``@timestamp``
299 """
300 self.loggit.debug(
301 'Cannot query closed indices. Omitting any closed indices.'
302 )
303 self.filter_closed()
304 self.loggit.debug(
305 'Cannot use field_stats with empty indices. Omitting any empty indices.'
306 )
307 self.filter_empty()
308 self.loggit.debug(
309 'Getting index date by querying indices for min & max value of '
310 '{0} field'.format(field)
311 )
312 self.empty_list_check()
313 index_lists = utils.chunk_index_list(self.indices)
314 for l in index_lists:
315 for index in l:
316 body = {
317 'aggs' : {
318 'min' : { 'min' : { 'field' : field } },
319 'max' : { 'max' : { 'field' : field } }
320 }
321 }
322 response = self.client.search(index=index, size=0, body=body)
323 self.loggit.debug('RESPONSE: {0}'.format(response))
324 if response:
325 try:
326 r = response['aggregations']
327 self.loggit.debug('r: {0}'.format(r))
328 s = self.index_info[index]['age']
329 s['min_value'] = utils.fix_epoch(r['min']['value'])
330 s['max_value'] = utils.fix_epoch(r['max']['value'])
331 self.loggit.debug('s: {0}'.format(s))
332 except KeyError:
333 raise exceptions.ActionError(
334 'Field "{0}" not found in index '
335 '"{1}"'.format(field, index)
336 )
338 def _calculate_ages(self, source=None, timestring=None, field=None,
339 stats_result=None
340 ):
341 """
342 This method initiates index age calculation based on the given
343 parameters. Exceptions are raised when they are improperly configured.
345 Set instance variable `age_keyfield` for use later, if needed.
347 :arg source: Source of index age. Can be one of 'name', 'creation_date',
348 or 'field_stats'
349 :arg timestring: An strftime string to match the datestamp in an index
350 name. Only used for index filtering by ``name``.
351 :arg field: A timestamp field name. Only used for ``field_stats`` based
352 calculations.
353 :arg stats_result: Either `min_value` or `max_value`. Only used in
354 conjunction with `source`=``field_stats`` to choose whether to
355 reference the minimum or maximum result value.
356 """
357 self.age_keyfield = source
358 if source == 'name':
359 if not timestring:
360 raise exceptions.MissingArgument(
361 'source "name" requires the "timestring" keyword argument'
362 )
363 self._get_name_based_ages(timestring)
364 elif source == 'creation_date':
365 # Nothing to do here as this comes from `get_metadata` in __init__
366 pass
367 elif source == 'field_stats':
368 if not field:
369 raise exceptions.MissingArgument(
370 'source "field_stats" requires the "field" keyword argument'
371 )
372 if stats_result not in ['min_value', 'max_value']:
373 raise ValueError(
374 'Invalid value for "stats_result": {0}'.format(stats_result)
375 )
376 self.age_keyfield = stats_result
377 self._get_field_stats_dates(field=field)
378 else:
379 raise ValueError(
380 'Invalid source: {0}. '
381 'Must be one of "name", '
382 '"creation_date", "field_stats".'.format(source)
383 )
385 def _sort_by_age(self, index_list, reverse=True):
386 """
387 Take a list of indices and sort them by date.
389 By default, the youngest are first with `reverse=True`, but the oldest
390 can be first by setting `reverse=False`
391 """
392 # Do the age-based sorting here.
393 # First, build an temporary dictionary with just index and age
394 # as the key and value, respectively
395 temp = {}
396 for index in index_list:
397 if self.age_keyfield in self.index_info[index]['age']:
398 temp[index] = self.index_info[index]['age'][self.age_keyfield]
399 else:
400 msg = (
401 '{0} does not have age key "{1}" in IndexList '
402 ' metadata'.format(index, self.age_keyfield)
403 )
404 self.__excludify(True, True, index, msg)
405 # Sort alphabetically prior to age sort to keep sorting consistent
406 temp_tuple = (
407 sorted(temp.items(), key=lambda k: k[0], reverse=reverse)
408 )
409 # If reverse is True, this will sort so the youngest indices are first.
410 # However, if you want oldest first, set reverse to False.
411 # Effectively, this should set us up to act on everything older than
412 # meets the other set criteria.
413 # It starts as a tuple, but then becomes a list.
414 sorted_tuple = (
415 sorted(temp_tuple, key=lambda k: k[1], reverse=reverse)
416 )
417 return [x[0] for x in sorted_tuple]
419 def filter_by_regex(self, kind=None, value=None, exclude=False):
420 """
421 Match indices by regular expression (pattern).
423 :arg kind: Can be one of: ``suffix``, ``prefix``, ``regex``, or
424 ``timestring``. This option defines what kind of filter you will be
425 building.
426 :arg value: Depends on `kind`. It is the strftime string if `kind` is
427 ``timestring``. It's used to build the regular expression for other
428 kinds.
429 :arg exclude: If `exclude` is `True`, this filter will remove matching
430 indices from `indices`. If `exclude` is `False`, then only matching
431 indices will be kept in `indices`.
432 Default is `False`
433 """
434 self.loggit.debug('Filtering indices by regex')
435 if kind not in [ 'regex', 'prefix', 'suffix', 'timestring' ]:
436 raise ValueError('{0}: Invalid value for kind'.format(kind))
438 # Stop here if None or empty value, but zero is okay
439 if value == 0:
440 pass
441 elif not value:
442 raise ValueError(
443 '{0}: Invalid value for "value". '
444 'Cannot be "None" type, empty, or False'
445 )
447 if kind == 'timestring':
448 regex = settings.regex_map()[kind].format(utils.get_date_regex(value))
449 else:
450 regex = settings.regex_map()[kind].format(value)
452 self.empty_list_check()
453 pattern = re.compile(regex)
454 for index in self.working_list():
455 self.loggit.debug('Filter by regex: Index: {0}'.format(index))
456 match = pattern.search(index)
457 if match:
458 self.__excludify(True, exclude, index)
459 else:
460 self.__excludify(False, exclude, index)
462 def filter_by_age(self, source='name', direction=None, timestring=None,
463 unit=None, unit_count=None, field=None, stats_result='min_value',
464 epoch=None, exclude=False, unit_count_pattern=False
465 ):
466 """
467 Match `indices` by relative age calculations.
469 :arg source: Source of index age. Can be one of 'name', 'creation_date',
470 or 'field_stats'
471 :arg direction: Time to filter, either ``older`` or ``younger``
472 :arg timestring: An strftime string to match the datestamp in an index
473 name. Only used for index filtering by ``name``.
474 :arg unit: One of ``seconds``, ``minutes``, ``hours``, ``days``,
475 ``weeks``, ``months``, or ``years``.
476 :arg unit_count: The number of ``unit`` (s). ``unit_count`` * ``unit`` will
477 be calculated out to the relative number of seconds.
478 :arg unit_count_pattern: A regular expression whose capture group identifies
479 the value for ``unit_count``.
480 :arg field: A timestamp field name. Only used for ``field_stats`` based
481 calculations.
482 :arg stats_result: Either `min_value` or `max_value`. Only used in
483 conjunction with `source`=``field_stats`` to choose whether to
484 reference the minimum or maximum result value.
485 :arg epoch: An epoch timestamp used in conjunction with ``unit`` and
486 ``unit_count`` to establish a point of reference for calculations.
487 If not provided, the current time will be used.
488 :arg exclude: If `exclude` is `True`, this filter will remove matching
489 indices from `indices`. If `exclude` is `False`, then only matching
490 indices will be kept in `indices`.
491 Default is `False`
492 """
494 self.loggit.debug('Filtering indices by age')
495 # Get timestamp point of reference, PoR
496 PoR = utils.get_point_of_reference(unit, unit_count, epoch)
497 if not direction:
498 raise exceptions.MissingArgument('Must provide a value for "direction"')
499 if direction not in ['older', 'younger']:
500 raise ValueError(
501 'Invalid value for "direction": {0}'.format(direction)
502 )
503 self._calculate_ages(
504 source=source, timestring=timestring, field=field,
505 stats_result=stats_result
506 )
507 if unit_count_pattern:
508 try:
509 unit_count_matcher = re.compile(unit_count_pattern)
510 except:
511 # We got an illegal regex, so won't be able to match anything
512 unit_count_matcher = None
513 for index in self.working_list():
514 try:
515 removeThisIndex = False
516 age = int(self.index_info[index]['age'][self.age_keyfield])
517 msg = (
518 'Index "{0}" age ({1}), direction: "{2}", point of '
519 'reference, ({3})'.format(
520 index,
521 age,
522 direction,
523 PoR
524 )
525 )
526 # Because time adds to epoch, smaller numbers are actually older
527 # timestamps.
528 if unit_count_pattern:
529 self.loggit.debug('Unit_count_pattern is set, trying to match pattern to index "{0}"'.format(index))
530 unit_count_from_index = utils.get_unit_count_from_name(index, unit_count_matcher)
531 if unit_count_from_index:
532 self.loggit.debug('Pattern matched, applying unit_count of "{0}"'.format(unit_count_from_index))
533 adjustedPoR = utils.get_point_of_reference(unit, unit_count_from_index, epoch)
534 self.loggit.debug('Adjusting point of reference from {0} to {1} based on unit_count of {2} from index name'.format(PoR, adjustedPoR, unit_count_from_index))
535 elif unit_count == -1:
536 # Unable to match pattern and unit_count is -1, meaning no fallback, so this
537 # index is removed from the list
538 self.loggit.debug('Unable to match pattern and no fallback value set. Removing index "{0}" from actionable list'.format(index))
539 removeThisIndex = True
540 adjustedPoR = PoR # necessary to avoid exception if the first index is excluded
541 else:
542 # Unable to match the pattern and unit_count is set, so fall back to using unit_count
543 # for determining whether to keep this index in the list
544 self.loggit.debug('Unable to match pattern using fallback value of "{0}"'.format(unit_count))
545 adjustedPoR = PoR
546 else:
547 adjustedPoR = PoR
548 if direction == 'older':
549 agetest = age < adjustedPoR
550 else:
551 agetest = age > adjustedPoR
552 self.__excludify(agetest and not removeThisIndex, exclude, index, msg)
553 except KeyError:
554 self.loggit.debug(
555 'Index "{0}" does not meet provided criteria. '
556 'Removing from list.'.format(index))
557 self.indices.remove(index)
559 def filter_by_space(
560 self, disk_space=None, reverse=True, use_age=False,
561 source='creation_date', timestring=None, field=None,
562 stats_result='min_value', exclude=False, threshold_behavior='greater_than'):
563 """
564 Remove indices from the actionable list based on space
565 consumed, sorted reverse-alphabetically by default. If you set
566 `reverse` to `False`, it will be sorted alphabetically.
568 The default is usually what you will want. If only one kind of index is
569 provided--for example, indices matching ``logstash-%Y.%m.%d``--then
570 reverse alphabetical sorting will mean the oldest will remain in the
571 list, because lower numbers in the dates mean older indices.
573 By setting `reverse` to `False`, then ``index3`` will be deleted before
574 ``index2``, which will be deleted before ``index1``
576 `use_age` allows ordering indices by age. Age is determined by the index
577 creation date by default, but you can specify an `source` of ``name``,
578 ``max_value``, or ``min_value``. The ``name`` `source` requires the
579 timestring argument.
581 `threshold_behavior`, when set to `greater_than` (default), includes if it the index
582 tests to be larger than `disk_space`. When set to `less_than`, it includes if
583 the index is smaller than `disk_space`
585 :arg disk_space: Filter indices over *n* gigabytes
586 :arg threshold_behavior: Size to filter, either ``greater_than`` or ``less_than``. Defaults
587 to ``greater_than`` to preserve backwards compatability.
588 :arg reverse: The filtering direction. (default: `True`). Ignored if
589 `use_age` is `True`
590 :arg use_age: Sort indices by age. ``source`` is required in this
591 case.
592 :arg source: Source of index age. Can be one of ``name``,
593 ``creation_date``, or ``field_stats``. Default: ``creation_date``
594 :arg timestring: An strftime string to match the datestamp in an index
595 name. Only used if `source` ``name`` is selected.
596 :arg field: A timestamp field name. Only used if `source`
597 ``field_stats`` is selected.
598 :arg stats_result: Either `min_value` or `max_value`. Only used if
599 `source` ``field_stats`` is selected. It determines whether to
600 reference the minimum or maximum value of `field` in each index.
601 :arg exclude: If `exclude` is `True`, this filter will remove matching
602 indices from `indices`. If `exclude` is `False`, then only matching
603 indices will be kept in `indices`.
604 Default is `False`
605 """
606 self.loggit.debug('Filtering indices by disk space')
607 # Ensure that disk_space is a float
608 if not disk_space:
609 raise exceptions.MissingArgument('No value for "disk_space" provided')
611 if threshold_behavior not in ['greater_than', 'less_than']:
612 raise ValueError(
613 'Invalid value for "threshold_behavior": {0}'.format(
614 threshold_behavior)
615 )
617 disk_space = float(disk_space)
619 disk_usage = 0.0
620 disk_limit = disk_space * 2**30
622 self.loggit.debug(
623 'Cannot get disk usage info from closed indices. '
624 'Omitting any closed indices.'
625 )
626 self.filter_closed()
628 # Create a copy-by-value working list
629 working_list = self.working_list()
631 if use_age:
632 self._calculate_ages(
633 source=source, timestring=timestring, field=field,
634 stats_result=stats_result
635 )
636 # Using default value of reverse=True in self._sort_by_age()
637 sorted_indices = self._sort_by_age(working_list)
639 else:
640 # Default to sorting by index name
641 sorted_indices = sorted(working_list, reverse=reverse)
643 for index in sorted_indices:
645 disk_usage += self.index_info[index]['size_in_bytes']
646 msg = (
647 '{0}, summed disk usage is {1} and disk limit is {2}.'.format(
648 index, utils.byte_size(disk_usage), utils.byte_size(disk_limit)
649 )
650 )
651 if threshold_behavior == 'greater_than':
652 self.__excludify((disk_usage > disk_limit),
653 exclude, index, msg)
654 elif threshold_behavior == 'less_than':
655 self.__excludify((disk_usage < disk_limit),
656 exclude, index, msg)
658 def filter_kibana(self, exclude=True):
659 """
660 Match any index named ``.kibana*``
661 in `indices`. Older releases addressed index names that no longer exist.
663 :arg exclude: If `exclude` is `True`, this filter will remove matching
664 indices from `indices`. If `exclude` is `False`, then only matching
665 indices will be kept in `indices`.
666 Default is `True`
667 """
668 self.loggit.debug('Filtering kibana indices')
669 self.empty_list_check()
670 for index in self.working_list():
671 pattern = re.compile(r'^\.kibana.*$')
672 if pattern.match(index):
673 self.__excludify(True, exclude, index)
674 else:
675 self.__excludify(False, exclude, index)
677 def filter_forceMerged(self, max_num_segments=None, exclude=True):
678 """
679 Match any index which has `max_num_segments` per shard or fewer in the
680 actionable list.
682 :arg max_num_segments: Cutoff number of segments per shard.
683 :arg exclude: If `exclude` is `True`, this filter will remove matching
684 indices from `indices`. If `exclude` is `False`, then only matching
685 indices will be kept in `indices`.
686 Default is `True`
687 """
688 self.loggit.debug('Filtering forceMerged indices')
689 if not max_num_segments:
690 raise exceptions.MissingArgument('Missing value for "max_num_segments"')
691 self.loggit.debug(
692 'Cannot get segment count of closed indices. '
693 'Omitting any closed indices.'
694 )
695 self.filter_closed()
696 self._get_segment_counts()
697 for index in self.working_list():
698 # Do this to reduce long lines and make it more readable...
699 shards = int(self.index_info[index]['number_of_shards'])
700 replicas = int(self.index_info[index]['number_of_replicas'])
701 segments = int(self.index_info[index]['segments'])
702 msg = (
703 '{0} has {1} shard(s) + {2} replica(s) '
704 'with a sum total of {3} segments.'.format(
705 index, shards, replicas, segments
706 )
707 )
708 expected_count = ((shards + (shards * replicas)) * max_num_segments)
709 self.__excludify((segments <= expected_count), exclude, index, msg)
712 def filter_closed(self, exclude=True):
713 """
714 Filter out closed indices from `indices`
716 :arg exclude: If `exclude` is `True`, this filter will remove matching
717 indices from `indices`. If `exclude` is `False`, then only matching
718 indices will be kept in `indices`.
719 Default is `True`
720 """
721 self.loggit.debug('Filtering closed indices')
722 self.empty_list_check()
723 for index in self.working_list():
724 condition = self.index_info[index]['state'] == 'close'
725 self.loggit.debug('Index {0} state: {1}'.format(
726 index, self.index_info[index]['state']
727 )
728 )
729 self.__excludify(condition, exclude, index)
731 def filter_empty(self, exclude=True):
732 """
733 Filter indices with a document count of zero
735 Indices that are closed are automatically excluded from consideration
736 due to closed indices reporting a document count of zero.
738 :arg exclude: If `exclude` is `True`, this filter will remove matching
739 indices from `indices`. If `exclude` is `False`, then only matching
740 indices will be kept in `indices`.
741 Default is `True`
742 """
743 self.loggit.debug('Filtering empty indices')
744 self.filter_closed()
745 self.empty_list_check()
746 for index in self.working_list():
747 condition = self.index_info[index]['docs'] == 0
748 self.loggit.debug('Index {0} doc count: {1}'.format(
749 index, self.index_info[index]['docs']
750 )
751 )
752 self.__excludify(condition, exclude, index)
754 def filter_opened(self, exclude=True):
755 """
756 Filter out opened indices from `indices`
758 :arg exclude: If `exclude` is `True`, this filter will remove matching
759 indices from `indices`. If `exclude` is `False`, then only matching
760 indices will be kept in `indices`.
761 Default is `True`
762 """
763 self.loggit.debug('Filtering open indices')
764 self.empty_list_check()
765 for index in self.working_list():
766 condition = self.index_info[index]['state'] == 'open'
767 self.loggit.debug('Index {0} state: {1}'.format(
768 index, self.index_info[index]['state']
769 )
770 )
771 self.__excludify(condition, exclude, index)
773 def filter_allocated(self,
774 key=None, value=None, allocation_type='require', exclude=True,
775 ):
776 """
777 Match indices that have the routing allocation rule of
778 `key=value` from `indices`
780 :arg key: The allocation attribute to check for
781 :arg value: The value to check for
782 :arg allocation_type: Type of allocation to apply
783 :arg exclude: If `exclude` is `True`, this filter will remove matching
784 indices from `indices`. If `exclude` is `False`, then only matching
785 indices will be kept in `indices`.
786 Default is `True`
787 """
788 self.loggit.debug(
789 'Filtering indices with shard routing allocation rules')
790 if not key:
791 raise exceptions.MissingArgument('No value for "key" provided')
792 if not value:
793 raise exceptions.MissingArgument('No value for "value" provided')
794 if not allocation_type in ['include', 'exclude', 'require']:
795 raise ValueError(
796 'Invalid "allocation_type": {0}'.format(allocation_type)
797 )
798 self.empty_list_check()
799 index_lists = utils.chunk_index_list(self.indices)
800 for l in index_lists:
801 working_list = self.client.indices.get_settings(index=utils.to_csv(l))
802 if working_list:
803 for index in list(working_list.keys()):
804 try:
805 has_routing = (
806 working_list[index]['settings']['index']['routing']['allocation'][allocation_type][key] == value
807 )
808 except KeyError:
809 has_routing = False
810 # if has_routing:
811 msg = (
812 '{0}: Routing (mis)match: '
813 'index.routing.allocation.{1}.{2}={3}.'.format(
814 index, allocation_type, key, value
815 )
816 )
817 # self.indices.remove(index)
818 self.__excludify(has_routing, exclude, index, msg)
820 def filter_none(self):
821 self.loggit.debug('"None" filter selected. No filtering will be done.')
823 def filter_by_alias(self, aliases=None, exclude=False):
824 """
825 Match indices which are associated with the alias or list of aliases
826 identified by `aliases`.
828 An update to Elasticsearch 5.5.0 changes the behavior of this from
829 previous 5.x versions:
830 https://www.elastic.co/guide/en/elasticsearch/reference/5.5/breaking-changes-5.5.html#breaking_55_rest_changes
832 What this means is that indices must appear in all aliases in list
833 `aliases` or a 404 error will result, leading to no indices being
834 matched. In older versions, if the index was associated with even one
835 of the aliases in `aliases`, it would result in a match.
837 It is unknown if this behavior affects anyone. At the time this was
838 written, no users have been bit by this. The code could be adapted
839 to manually loop if the previous behavior is desired. But if no users
840 complain, this will become the accepted/expected behavior.
842 :arg aliases: A list of alias names.
843 :type aliases: list
844 :arg exclude: If `exclude` is `True`, this filter will remove matching
845 indices from `indices`. If `exclude` is `False`, then only matching
846 indices will be kept in `indices`.
847 Default is `False`
848 """
849 self.loggit.debug(
850 'Filtering indices matching aliases: "{0}"'.format(aliases))
851 if not aliases:
852 raise exceptions.MissingArgument('No value for "aliases" provided')
853 aliases = utils.ensure_list(aliases)
854 self.empty_list_check()
855 index_lists = utils.chunk_index_list(self.indices)
856 for l in index_lists:
857 try:
858 # get_alias will either return {} or a NotFoundError.
859 has_alias = list(self.client.indices.get_alias(
860 index=utils.to_csv(l),
861 name=utils.to_csv(aliases)
862 ).keys())
863 self.loggit.debug('has_alias: {0}'.format(has_alias))
864 except NotFoundError:
865 # if we see the NotFoundError, we need to set working_list to {}
866 has_alias = []
867 for index in l:
868 if index in has_alias:
869 isOrNot = 'is'
870 condition = True
871 else:
872 isOrNot = 'is not'
873 condition = False
874 msg = (
875 '{0} {1} associated with aliases: {2}'.format(
876 index, isOrNot, aliases
877 )
878 )
879 self.__excludify(condition, exclude, index, msg)
881 def filter_by_count(
882 self, count=None, reverse=True, use_age=False, pattern=None,
883 source='creation_date', timestring=None, field=None,
884 stats_result='min_value', exclude=True):
885 # pylint: disable=W1401
886 """
887 Remove indices from the actionable list beyond the number `count`,
888 sorted reverse-alphabetically by default. If you set `reverse` to
889 `False`, it will be sorted alphabetically.
891 The default is usually what you will want. If only one kind of index is
892 provided--for example, indices matching ``logstash-%Y.%m.%d``--then
893 reverse alphabetical sorting will mean the oldest will remain in the
894 list, because lower numbers in the dates mean older indices.
896 By setting `reverse` to `False`, then ``index3`` will be deleted before
897 ``index2``, which will be deleted before ``index1``
899 `use_age` allows ordering indices by age. Age is determined by the index
900 creation date by default, but you can specify an `source` of ``name``,
901 ``max_value``, or ``min_value``. The ``name`` `source` requires the
902 timestring argument.
904 :arg count: Filter indices beyond `count`.
905 :arg reverse: The filtering direction. (default: `True`).
906 :arg use_age: Sort indices by age. ``source`` is required in this
907 case.
908 :arg pattern: Select indices to count from a regular expression
909 pattern. This pattern must have one and only one capture group.
910 This can allow a single ``count`` filter instance to operate against
911 any number of matching patterns, and keep ``count`` of each index
912 in that group. For example, given a ``pattern`` of ``'^(.*)-\d{6}$'``,
913 it will match both ``rollover-000001`` and ``index-999990``, but not
914 ``logstash-2017.10.12``. Following the same example, if my cluster
915 also had ``rollover-000002`` through ``rollover-000010`` and
916 ``index-888888`` through ``index-999999``, it will process both
917 groups of indices, and include or exclude the ``count`` of each.
918 :arg source: Source of index age. Can be one of ``name``,
919 ``creation_date``, or ``field_stats``. Default: ``creation_date``
920 :arg timestring: An strftime string to match the datestamp in an index
921 name. Only used if `source` ``name`` is selected.
922 :arg field: A timestamp field name. Only used if `source`
923 ``field_stats`` is selected.
924 :arg stats_result: Either `min_value` or `max_value`. Only used if
925 `source` ``field_stats`` is selected. It determines whether to
926 reference the minimum or maximum value of `field` in each index.
927 :arg exclude: If `exclude` is `True`, this filter will remove matching
928 indices from `indices`. If `exclude` is `False`, then only matching
929 indices will be kept in `indices`.
930 Default is `True`
931 """
932 self.loggit.debug('Filtering indices by count')
933 if not count:
934 raise exceptions.MissingArgument('No value for "count" provided')
936 # Create a copy-by-value working list
937 working_list = self.working_list()
938 if pattern:
939 try:
940 r = re.compile(pattern)
941 if r.groups < 1:
942 raise exceptions.ConfigurationError('No regular expression group found in {0}'.format(pattern))
943 elif r.groups > 1:
944 raise exceptions.ConfigurationError('More than 1 regular expression group found in {0}'.format(pattern))
945 # Prune indices not matching the regular expression the object (and filtered_indices)
946 # We do not want to act on them by accident.
947 prune_these = list(filter(lambda x: r.match(x) is None, working_list))
948 filtered_indices = working_list
949 for index in prune_these:
950 msg = (
951 '{0} does not match regular expression {1}.'.format(
952 index, pattern
953 )
954 )
955 condition = True
956 exclude = True
957 self.__excludify(condition, exclude, index, msg)
958 # also remove it from filtered_indices
959 filtered_indices.remove(index)
960 # Presort these filtered_indices using the lambda
961 presorted = sorted(filtered_indices, key=lambda x: r.match(x).group(1))
962 except Exception as e:
963 raise exceptions.ActionError('Unable to process pattern: "{0}". Error: {1}'.format(pattern, e))
964 # Initialize groups here
965 groups = []
966 # We have to pull keys k this way, but we don't need to keep them
967 # We only need g for groups
968 for _, g in itertools.groupby(presorted, key=lambda x: r.match(x).group(1)):
969 groups.append(list(g))
970 else:
971 # Since pattern will create a list of lists, and we iterate over that,
972 # we need to put our single list inside a list
973 groups = [ working_list ]
974 for group in groups:
975 if use_age:
976 if source != 'name':
977 self.loggit.warning(
978 'Cannot get age information from closed indices unless '
979 'source="name". Omitting any closed indices.'
980 )
981 self.filter_closed()
982 self._calculate_ages(
983 source=source, timestring=timestring, field=field,
984 stats_result=stats_result
985 )
986 # Using default value of reverse=True in self._sort_by_age()
987 sorted_indices = self._sort_by_age(group, reverse=reverse)
989 else:
990 # Default to sorting by index name
991 sorted_indices = sorted(group, reverse=reverse)
994 idx = 1
995 for index in sorted_indices:
996 msg = (
997 '{0} is {1} of specified count of {2}.'.format(
998 index, idx, count
999 )
1000 )
1001 condition = True if idx <= count else False
1002 self.__excludify(condition, exclude, index, msg)
1003 idx += 1
1005 def filter_by_shards(self, number_of_shards=None, shard_filter_behavior='greater_than', exclude=False):
1006 """
1007 Match `indices` with a given shard count.
1009 Selects all indices with a shard count 'greater_than' number_of_shards by default.
1010 Use shard_filter_behavior to select indices with shard count 'greater_than', 'greater_than_or_equal',
1011 'less_than', 'less_than_or_equal', or 'equal' to number_of_shards.
1013 :arg number_of_shards: shard threshold
1014 :arg shard_filter_behavior: Do you want to filter on greater_than, greater_than_or_equal, less_than,
1015 less_than_or_equal, or equal?
1016 :arg exclude: If `exclude` is `True`, this filter will remove matching
1017 indices from `indices`. If `exclude` is `False`, then only matching
1018 indices will be kept in `indices`.
1019 Default is `False`
1020 """
1021 self.loggit.debug("Filtering indices by number of shards")
1022 if not number_of_shards:
1023 raise exceptions.MissingArgument('No value for "number_of_shards" provided')
1025 if shard_filter_behavior not in ['greater_than', 'less_than', 'greater_than_or_equal', 'less_than_or_equal', 'equal']:
1026 raise ValueError(
1027 'Invalid value for "shard_filter_behavior": {0}'.format(
1028 shard_filter_behavior)
1029 )
1031 if number_of_shards < 1 or (shard_filter_behavior == 'less_than' and number_of_shards == 1):
1032 raise ValueError(
1033 'Unacceptable value: {0} -- "number_of_shards" cannot be less than 1. A valid index '
1034 'will have at least one shard.'.format(number_of_shards)
1035 )
1037 self.empty_list_check()
1038 for index in self.working_list():
1039 self.loggit.debug('Filter by number of shards: Index: {0}'.format(index))
1041 if shard_filter_behavior == 'greater_than':
1042 condition = int(self.index_info[index]['number_of_shards']) > number_of_shards
1043 elif shard_filter_behavior == 'less_than':
1044 condition = int(self.index_info[index]['number_of_shards']) < number_of_shards
1045 elif shard_filter_behavior == 'greater_than_or_equal':
1046 condition = int(self.index_info[index]['number_of_shards']) >= number_of_shards
1047 elif shard_filter_behavior == 'less_than_or_equal':
1048 condition = int(self.index_info[index]['number_of_shards']) <= number_of_shards
1049 else:
1050 condition = int(self.index_info[index]['number_of_shards']) == number_of_shards
1052 self.__excludify(condition, exclude, index)
1054 def filter_period(
1055 self, period_type='relative', source='name', range_from=None, range_to=None,
1056 date_from=None, date_to=None, date_from_format=None, date_to_format=None,
1057 timestring=None, unit=None, field=None, stats_result='min_value',
1058 intersect=False, week_starts_on='sunday', epoch=None, exclude=False,
1059 ):
1060 """
1061 Match `indices` with ages within a given period.
1063 :arg period_type: Can be either ``absolute`` or ``relative``. Default is
1064 ``relative``. ``date_from`` and ``date_to`` are required when using
1065 ``period_type='absolute'``. ``range_from`` and ``range_to`` are
1066 required with ``period_type='relative'``.
1067 :arg source: Source of index age. Can be one of 'name', 'creation_date',
1068 or 'field_stats'
1069 :arg range_from: How many ``unit`` (s) in the past/future is the origin?
1070 :arg range_to: How many ``unit`` (s) in the past/future is the end point?
1071 :arg date_from: The simplified date for the start of the range
1072 :arg date_to: The simplified date for the end of the range. If this value
1073 is the same as ``date_from``, the full value of ``unit`` will be
1074 extrapolated for the range. For example, if ``unit`` is ``months``,
1075 and ``date_from`` and ``date_to`` are both ``2017.01``, then the entire
1076 month of January 2017 will be the absolute date range.
1077 :arg date_from_format: The strftime string used to parse ``date_from``
1078 :arg date_to_format: The strftime string used to parse ``date_to``
1079 :arg timestring: An strftime string to match the datestamp in an index
1080 name. Only used for index filtering by ``name``.
1081 :arg unit: One of ``hours``, ``days``, ``weeks``, ``months``, or
1082 ``years``.
1083 :arg field: A timestamp field name. Only used for ``field_stats`` based
1084 calculations.
1085 :arg stats_result: Either `min_value` or `max_value`. Only used in
1086 conjunction with ``source='field_stats'`` to choose whether to
1087 reference the minimum or maximum result value.
1088 :arg intersect: Only used when ``source='field_stats'``.
1089 If `True`, only indices where both `min_value` and `max_value` are
1090 within the period will be selected. If `False`, it will use whichever
1091 you specified. Default is `False` to preserve expected behavior.
1092 :arg week_starts_on: Either ``sunday`` or ``monday``. Default is
1093 ``sunday``
1094 :arg epoch: An epoch timestamp used to establish a point of reference
1095 for calculations. If not provided, the current time will be used.
1096 :arg exclude: If `exclude` is `True`, this filter will remove matching
1097 indices from `indices`. If `exclude` is `False`, then only matching
1098 indices will be kept in `indices`.
1099 Default is `False`
1100 """
1102 self.loggit.debug('Filtering indices by period')
1103 if period_type not in ['absolute', 'relative']:
1104 raise ValueError(
1105 'Unacceptable value: {0} -- "period_type" must be either "absolute" or '
1106 '"relative".'.format(period_type)
1107 )
1108 if period_type == 'relative':
1109 func = utils.date_range
1110 args = [unit, range_from, range_to, epoch]
1111 kwgs = { 'week_starts_on': week_starts_on }
1112 if type(range_from) != type(int()) or type(range_to) != type(int()):
1113 raise exceptions.ConfigurationError(
1114 '"range_from" and "range_to" must be integer values')
1115 else:
1116 func = utils.absolute_date_range
1117 args = [unit, date_from, date_to]
1118 kwgs = { 'date_from_format': date_from_format, 'date_to_format': date_to_format }
1119 for reqd in [date_from, date_to, date_from_format, date_to_format]:
1120 if not reqd:
1121 raise exceptions.ConfigurationError(
1122 'Must provide "date_from", "date_to", "date_from_format", and '
1123 '"date_to_format" with absolute period_type'
1124 )
1125 try:
1126 start, end = func(*args, **kwgs)
1127 except Exception as e:
1128 utils.report_failure(e)
1130 self._calculate_ages(
1131 source=source, timestring=timestring, field=field,
1132 stats_result=stats_result
1133 )
1134 for index in self.working_list():
1135 try:
1136 if source == 'field_stats' and intersect:
1137 min_age = int(self.index_info[index]['age']['min_value'])
1138 max_age = int(self.index_info[index]['age']['max_value'])
1139 msg = (
1140 'Index "{0}", timestamp field "{1}", min_value ({2}), '
1141 'max_value ({3}), period start: "{4}", period '
1142 'end, "{5}"'.format(
1143 index,
1144 field,
1145 min_age,
1146 max_age,
1147 start,
1148 end
1149 )
1150 )
1151 # Because time adds to epoch, smaller numbers are actually older
1152 # timestamps.
1153 inrange = ((min_age >= start) and (max_age <= end))
1154 else:
1155 age = int(self.index_info[index]['age'][self.age_keyfield])
1156 msg = (
1157 'Index "{0}" age ({1}), period start: "{2}", period '
1158 'end, "{3}"'.format(
1159 index,
1160 age,
1161 start,
1162 end
1163 )
1164 )
1165 # Because time adds to epoch, smaller numbers are actually older
1166 # timestamps.
1167 inrange = ((age >= start) and (age <= end))
1168 self.__excludify(inrange, exclude, index, msg)
1169 except KeyError:
1170 self.loggit.debug(
1171 'Index "{0}" does not meet provided criteria. '
1172 'Removing from list.'.format(index))
1173 self.indices.remove(index)
1175 def filter_ilm(self, exclude=True):
1176 """
1177 Match indices that have the setting `index.lifecycle.name`
1179 :arg exclude: If `exclude` is `True`, this filter will remove matching
1180 indices from `indices`. If `exclude` is `False`, then only matching
1181 indices will be kept in `indices`.
1182 Default is `True`
1183 """
1184 self.loggit.debug('Filtering indices with index.lifecycle.name')
1185 index_lists = utils.chunk_index_list(self.indices)
1186 if index_lists == [['']]:
1187 self.loggit.debug('Empty working list. No ILM indices to filter.')
1188 return
1189 for l in index_lists:
1190 working_list = self.client.indices.get_settings(index=utils.to_csv(l))
1191 if working_list:
1192 for index in list(working_list.keys()):
1193 try:
1194 subvalue = working_list[index]['settings']['index']['lifecycle']
1195 has_ilm = 'name' in subvalue
1196 msg = '{0} has index.lifecycle.name {1}'.format(index, subvalue['name'])
1197 except KeyError:
1198 has_ilm = False
1199 msg = 'index.lifecycle.name is not set for index {0}'.format(index)
1200 self.__excludify(has_ilm, exclude, index, msg)
1202 def iterate_filters(self, filter_dict):
1203 """
1204 Iterate over the filters defined in `config` and execute them.
1206 :arg filter_dict: The configuration dictionary
1208 .. note:: `filter_dict` should be a dictionary with the following form:
1209 .. code-block:: python
1211 { 'filters' : [
1212 {
1213 'filtertype': 'the_filter_type',
1214 'key1' : 'value1',
1215 ...
1216 'keyN' : 'valueN'
1217 }
1218 ]
1219 }
1221 """
1222 self.loggit.debug('Iterating over a list of filters')
1223 # Make sure we actually _have_ filters to act on
1224 if not 'filters' in filter_dict or len(filter_dict['filters']) < 1:
1225 self.loggit.info('No filters in config. Returning unaltered object.')
1226 return
1228 self.loggit.debug('All filters: {0}'.format(filter_dict['filters']))
1229 for f in filter_dict['filters']:
1230 self.loggit.debug('Top of the loop: {0}'.format(self.indices))
1231 self.loggit.debug('Un-parsed filter args: {0}'.format(f))
1232 # Make sure we got at least this much in the configuration
1233 self.loggit.debug('Parsed filter args: {0}'.format(
1234 SchemaCheck(
1235 f,
1236 filters.structure(),
1237 'filter',
1238 'IndexList.iterate_filters'
1239 ).result()
1240 )
1241 )
1242 method = self.__map_method(f['filtertype'])
1243 del f['filtertype']
1244 # If it's a filtertype with arguments, update the defaults with the
1245 # provided settings.
1246 if f:
1247 self.loggit.debug('Filter args: {0}'.format(f))
1248 self.loggit.debug('Pre-instance: {0}'.format(self.indices))
1249 method(**f)
1250 self.loggit.debug('Post-instance: {0}'.format(self.indices))
1251 else:
1252 # Otherwise, it's a settingless filter.
1253 method()
1255 def filter_by_size(
1256 self, size_threshold=None, threshold_behavior='greater_than', exclude=False, size_behavior='primary'):
1257 """
1258 Remove indices from the actionable list based on index size.
1260 `threshold_behavior`, when set to `greater_than` (default), includes if it the index
1261 tests to be larger than `size_threshold`. When set to `less_than`, it includes if
1262 the index is smaller than `size_threshold`
1264 :arg size_threshold: Filter indices over *n* gigabytes
1265 :arg threshold_behavior: Size to filter, either ``greater_than`` or ``less_than``. Defaults
1266 to ``greater_than`` to preserve backwards compatability.
1267 :arg size_behavior: Size that used to filter, either ``primary`` or ``total``. Defaults to ``primary``
1268 :arg exclude: If `exclude` is `True`, this filter will remove matching
1269 indices from `indices`. If `exclude` is `False`, then only matching
1270 indices will be kept in `indices`.
1271 Default is `False`
1272 """
1273 self.loggit.debug('Filtering indices by index size')
1274 # Ensure that disk_space is a float
1275 if not size_threshold:
1276 raise exceptions.MissingArgument('No value for "size_threshold" provided')
1278 if size_behavior not in ['primary', 'total']:
1279 raise ValueError( 'Invalid value for "size_behavior": {0}'.format(size_behavior))
1281 if threshold_behavior not in ['greater_than', 'less_than']:
1282 raise ValueError( 'Invalid value for "threshold_behavior": {0}'.format(threshold_behavior))
1284 index_size_limit = float(size_threshold) * 2**30
1286 self.loggit.debug(
1287 'Cannot get disk usage info from closed indices. '
1288 'Omitting any closed indices.'
1289 )
1290 self.filter_closed()
1292 # Create a copy-by-value working list
1293 working_list = self.working_list()
1295 for index in working_list:
1297 if size_behavior == 'primary':
1298 index_size = self.index_info[index]['primary_size_in_bytes']
1299 else:
1300 index_size = self.index_info[index]['size_in_bytes']
1302 msg = (
1303 '{0}, index size is {1} and size limit is {2}.'.format(
1304 index, utils.byte_size(index_size), utils.byte_size(index_size_limit)
1305 )
1306 )
1307 if threshold_behavior == 'greater_than':
1308 self.__excludify((index_size > index_size_limit), exclude, index, msg)
1309 elif threshold_behavior == 'less_than':
1310 self.__excludify((index_size < index_size_limit), exclude, index, msg)