Coverage for curator/indexlist.py: 94%

534 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-16 15:36 -0600

1from datetime import timedelta, datetime, date 

2import time 

3import re 

4import itertools 

5import logging 

6from elasticsearch7.exceptions import NotFoundError, TransportError 

7from curator import exceptions, utils 

8from curator.defaults import settings 

9from curator.validators import SchemaCheck, filters 

10 

11class IndexList(object): 

12 def __init__(self, client): 

13 utils.verify_client_object(client) 

14 self.loggit = logging.getLogger('curator.indexlist') 

15 #: An Elasticsearch Client object 

16 #: Also accessible as an instance variable. 

17 self.client = client 

18 #: Instance variable. 

19 #: Information extracted from indices, such as segment count, age, etc. 

20 #: Populated at instance creation time, and by other private helper 

21 #: methods, as needed. **Type:** ``dict()`` 

22 self.index_info = {} 

23 #: Instance variable. 

24 #: The running list of indices which will be used by an Action class. 

25 #: Populated at instance creation time. **Type:** ``list()`` 

26 self.indices = [] 

27 #: Instance variable. 

28 #: All indices in the cluster at instance creation time. 

29 #: **Type:** ``list()`` 

30 self.all_indices = [] 

31 self.__get_indices() 

32 

33 def __actionable(self, idx): 

34 self.loggit.debug( 

35 'Index {0} is actionable and remains in the list.'.format(idx)) 

36 

37 def __not_actionable(self, idx): 

38 self.loggit.debug( 

39 'Index {0} is not actionable, removing from list.'.format(idx)) 

40 self.indices.remove(idx) 

41 

42 def __excludify(self, condition, exclude, index, msg=None): 

43 if condition == True: 

44 if exclude: 

45 text = "Removed from actionable list" 

46 self.__not_actionable(index) 

47 else: 

48 text = "Remains in actionable list" 

49 self.__actionable(index) 

50 else: 

51 if exclude: 

52 text = "Remains in actionable list" 

53 self.__actionable(index) 

54 else: 

55 text = "Removed from actionable list" 

56 self.__not_actionable(index) 

57 if msg: 

58 self.loggit.debug('{0}: {1}'.format(text, msg)) 

59 

60 def __get_indices(self): 

61 """ 

62 Pull all indices into `all_indices`, then populate `indices` and 

63 `index_info` 

64 """ 

65 self.loggit.debug('Getting all indices') 

66 self.all_indices = utils.get_indices(self.client) 

67 self.indices = self.all_indices[:] 

68 if self.indices: 

69 for index in self.indices: 

70 self.__build_index_info(index) 

71 self._get_metadata() 

72 self._get_index_stats() 

73 

74 def __build_index_info(self, index): 

75 """ 

76 Ensure that `index` is a key in `index_info`. If not, create a 

77 sub-dictionary structure under that key. 

78 """ 

79 self.loggit.debug( 

80 'Building preliminary index metadata for {0}'.format(index)) 

81 if not index in self.index_info: 

82 self.index_info[index] = { 

83 "age" : {}, 

84 "number_of_replicas" : 0, 

85 "number_of_shards" : 0, 

86 "segments" : 0, 

87 "size_in_bytes" : 0, 

88 "docs" : 0, 

89 "state" : "", 

90 } 

91 

92 def __map_method(self, ft): 

93 methods = { 

94 'alias': self.filter_by_alias, 

95 'age': self.filter_by_age, 

96 'allocated': self.filter_allocated, 

97 'closed': self.filter_closed, 

98 'count': self.filter_by_count, 

99 'empty': self.filter_empty, 

100 'forcemerged': self.filter_forceMerged, 

101 'ilm': self.filter_ilm, 

102 'kibana': self.filter_kibana, 

103 'none': self.filter_none, 

104 'opened': self.filter_opened, 

105 'period': self.filter_period, 

106 'pattern': self.filter_by_regex, 

107 'space': self.filter_by_space, 

108 'shards': self.filter_by_shards, 

109 'size': self.filter_by_size, 

110 } 

111 return methods[ft] 

112 

113 def _get_index_stats(self): 

114 """ 

115 Populate `index_info` with index `size_in_bytes`, `primary_size_in_bytes` and doc count 

116 information for each index. 

117 """ 

118 self.loggit.debug('Getting index stats') 

119 self.empty_list_check() 

120 # Subroutine to do the dirty work 

121 def iterate_over_stats(stats): 

122 for index in stats['indices']: 

123 size = stats['indices'][index]['total']['store']['size_in_bytes'] 

124 docs = stats['indices'][index]['total']['docs']['count'] 

125 primary_size = stats['indices'][index]['primaries']['store']['size_in_bytes'] 

126 

127 self.loggit.debug( 

128 'Index: {0} Size: {1} Docs: {2} PrimarySize: {3}'.format( 

129 index, utils.byte_size(size), docs, utils.byte_size(primary_size) 

130 ) 

131 ) 

132 self.index_info[index]['size_in_bytes'] = size 

133 self.index_info[index]['docs'] = docs 

134 self.index_info[index]['primary_size_in_bytes'] = primary_size 

135 

136 working_list = self.working_list() 

137 for index in self.working_list(): 

138 if self.index_info[index]['state'] == 'close': 

139 working_list.remove(index) 

140 if working_list: 

141 index_lists = utils.chunk_index_list(working_list) 

142 for l in index_lists: 

143 stats_result = {} 

144 

145 try: 

146 stats_result.update(self._get_indices_stats(l)) 

147 except TransportError as err: 

148 if err.status_code == 413: 

149 self.loggit.debug('Huge Payload 413 Error - Trying to get information with multiple requests') 

150 stats_result = {} 

151 stats_result.update(self._bulk_queries(l, self._get_indices_stats)) 

152 

153 iterate_over_stats(stats_result) 

154 

155 def _get_indices_stats(self, data): 

156 return self.client.indices.stats(index=utils.to_csv(data), metric='store,docs') 

157 

158 def _bulk_queries(self, data, exec_func): 

159 slice_number = 10 

160 query_result = {} 

161 loop_number = round(len(data)/slice_number) if round(len(data)/slice_number) > 0 else 1 

162 self.loggit.debug("Bulk Queries - number requests created: {0}".format(loop_number)) 

163 

164 for num in range(0, loop_number): 

165 if num == (loop_number-1): 

166 data_sliced = data[num*slice_number:] 

167 else: 

168 data_sliced = data[num*slice_number:(num+1)*slice_number] 

169 query_result.update(exec_func(data_sliced)) 

170 

171 return query_result 

172 

173 def _get_cluster_state(self, data): 

174 return self.client.cluster.state(index=utils.to_csv(data), metric='metadata')['metadata']['indices'] 

175 

176 def _get_metadata(self): 

177 """ 

178 Populate `index_info` with index `size_in_bytes` and doc count 

179 information for each index. 

180 """ 

181 self.loggit.debug('Getting index metadata') 

182 self.empty_list_check() 

183 index_lists = utils.chunk_index_list(self.indices) 

184 for l in index_lists: 

185 working_list = {} 

186 try: 

187 working_list.update(self._get_cluster_state(l)) 

188 except TransportError as err: 

189 if err.status_code == 413: 

190 self.loggit.debug('Huge Payload 413 Error - Trying to get information with multiple requests') 

191 working_list = {} 

192 working_list.update(self._bulk_queries(l, self._get_cluster_state)) 

193 

194 if working_list: 

195 for index in list(working_list.keys()): 

196 s = self.index_info[index] 

197 wl = working_list[index] 

198 

199 if 'settings' not in wl: 

200 # Used by AWS ES <= 5.1 

201 # We can try to get the same info from index/_settings. 

202 # workaround for https://github.com/elastic/curator/issues/880 

203 alt_wl = self.client.indices.get(index, feature='_settings')[index] 

204 wl['settings'] = alt_wl['settings'] 

205 

206 if 'creation_date' not in wl['settings']['index']: 

207 self.loggit.warning( 

208 'Index: {0} has no "creation_date"! This implies ' 

209 'that the index predates Elasticsearch v1.4. For ' 

210 'safety, this index will be removed from the ' 

211 'actionable list.'.format(index) 

212 ) 

213 self.__not_actionable(index) 

214 else: 

215 s['age']['creation_date'] = ( 

216 utils.fix_epoch(wl['settings']['index']['creation_date']) 

217 ) 

218 s['number_of_replicas'] = ( 

219 wl['settings']['index']['number_of_replicas'] 

220 ) 

221 s['number_of_shards'] = ( 

222 wl['settings']['index']['number_of_shards'] 

223 ) 

224 s['state'] = wl['state'] 

225 if 'routing' in wl['settings']['index']: 

226 s['routing'] = wl['settings']['index']['routing'] 

227 

228 def empty_list_check(self): 

229 """Raise exception if `indices` is empty""" 

230 self.loggit.debug('Checking for empty list') 

231 if not self.indices: 

232 raise exceptions.NoIndices('index_list object is empty.') 

233 

234 def working_list(self): 

235 """ 

236 Return the current value of `indices` as copy-by-value to prevent list 

237 stomping during iterations 

238 """ 

239 # Copy by value, rather than reference to prevent list stomping during 

240 # iterations 

241 self.loggit.debug('Generating working list of indices') 

242 return self.indices[:] 

243 

244 def _get_indices_segments(self, data): 

245 return self.client.indices.segments(index=utils.to_csv(data))['indices'].copy() 

246 

247 def _get_segment_counts(self): 

248 """ 

249 Populate `index_info` with segment information for each index. 

250 """ 

251 self.loggit.debug('Getting index segment counts') 

252 self.empty_list_check() 

253 index_lists = utils.chunk_index_list(self.indices) 

254 for l in index_lists: 

255 working_list = {} 

256 try: 

257 working_list.update(self._get_indices_segments(l)) 

258 except TransportError as err: 

259 if err.status_code == 413: 

260 self.loggit.debug('Huge Payload 413 Error - Trying to get information with multiple requests') 

261 working_list = {} 

262 working_list.update(self._bulk_queries(l, self._get_indices_segments)) 

263 

264 if working_list: 

265 for index in list(working_list.keys()): 

266 shards = working_list[index]['shards'] 

267 segmentcount = 0 

268 for shardnum in shards: 

269 for shard in range(0,len(shards[shardnum])): 

270 segmentcount += ( 

271 shards[shardnum][shard]['num_search_segments'] 

272 ) 

273 self.index_info[index]['segments'] = segmentcount 

274 

275 def _get_name_based_ages(self, timestring): 

276 """ 

277 Add indices to `index_info` based on the age as indicated by the index 

278 name pattern, if it matches `timestring` 

279 

280 :arg timestring: An strftime pattern 

281 """ 

282 # Check for empty list before proceeding here to prevent non-iterable 

283 # condition 

284 self.loggit.debug('Getting ages of indices by "name"') 

285 self.empty_list_check() 

286 ts = utils.TimestringSearch(timestring) 

287 for index in self.working_list(): 

288 epoch = ts.get_epoch(index) 

289 if isinstance(epoch, int): 

290 self.index_info[index]['age']['name'] = epoch 

291 

292 def _get_field_stats_dates(self, field='@timestamp'): 

293 """ 

294 Add indices to `index_info` based on the values the queries return, 

295 as determined by the min and max aggregated values of `field` 

296 

297 :arg field: The field with the date value. The field must be mapped in 

298 elasticsearch as a date datatype. Default: ``@timestamp`` 

299 """ 

300 self.loggit.debug( 

301 'Cannot query closed indices. Omitting any closed indices.' 

302 ) 

303 self.filter_closed() 

304 self.loggit.debug( 

305 'Cannot use field_stats with empty indices. Omitting any empty indices.' 

306 ) 

307 self.filter_empty() 

308 self.loggit.debug( 

309 'Getting index date by querying indices for min & max value of ' 

310 '{0} field'.format(field) 

311 ) 

312 self.empty_list_check() 

313 index_lists = utils.chunk_index_list(self.indices) 

314 for l in index_lists: 

315 for index in l: 

316 body = { 

317 'aggs' : { 

318 'min' : { 'min' : { 'field' : field } }, 

319 'max' : { 'max' : { 'field' : field } } 

320 } 

321 } 

322 response = self.client.search(index=index, size=0, body=body) 

323 self.loggit.debug('RESPONSE: {0}'.format(response)) 

324 if response: 

325 try: 

326 r = response['aggregations'] 

327 self.loggit.debug('r: {0}'.format(r)) 

328 s = self.index_info[index]['age'] 

329 s['min_value'] = utils.fix_epoch(r['min']['value']) 

330 s['max_value'] = utils.fix_epoch(r['max']['value']) 

331 self.loggit.debug('s: {0}'.format(s)) 

332 except KeyError: 

333 raise exceptions.ActionError( 

334 'Field "{0}" not found in index ' 

335 '"{1}"'.format(field, index) 

336 ) 

337 

338 def _calculate_ages(self, source=None, timestring=None, field=None, 

339 stats_result=None 

340 ): 

341 """ 

342 This method initiates index age calculation based on the given 

343 parameters. Exceptions are raised when they are improperly configured. 

344 

345 Set instance variable `age_keyfield` for use later, if needed. 

346 

347 :arg source: Source of index age. Can be one of 'name', 'creation_date', 

348 or 'field_stats' 

349 :arg timestring: An strftime string to match the datestamp in an index 

350 name. Only used for index filtering by ``name``. 

351 :arg field: A timestamp field name. Only used for ``field_stats`` based 

352 calculations. 

353 :arg stats_result: Either `min_value` or `max_value`. Only used in 

354 conjunction with `source`=``field_stats`` to choose whether to 

355 reference the minimum or maximum result value. 

356 """ 

357 self.age_keyfield = source 

358 if source == 'name': 

359 if not timestring: 

360 raise exceptions.MissingArgument( 

361 'source "name" requires the "timestring" keyword argument' 

362 ) 

363 self._get_name_based_ages(timestring) 

364 elif source == 'creation_date': 

365 # Nothing to do here as this comes from `get_metadata` in __init__ 

366 pass 

367 elif source == 'field_stats': 

368 if not field: 

369 raise exceptions.MissingArgument( 

370 'source "field_stats" requires the "field" keyword argument' 

371 ) 

372 if stats_result not in ['min_value', 'max_value']: 

373 raise ValueError( 

374 'Invalid value for "stats_result": {0}'.format(stats_result) 

375 ) 

376 self.age_keyfield = stats_result 

377 self._get_field_stats_dates(field=field) 

378 else: 

379 raise ValueError( 

380 'Invalid source: {0}. ' 

381 'Must be one of "name", ' 

382 '"creation_date", "field_stats".'.format(source) 

383 ) 

384 

385 def _sort_by_age(self, index_list, reverse=True): 

386 """ 

387 Take a list of indices and sort them by date. 

388 

389 By default, the youngest are first with `reverse=True`, but the oldest 

390 can be first by setting `reverse=False` 

391 """ 

392 # Do the age-based sorting here. 

393 # First, build an temporary dictionary with just index and age 

394 # as the key and value, respectively 

395 temp = {} 

396 for index in index_list: 

397 if self.age_keyfield in self.index_info[index]['age']: 

398 temp[index] = self.index_info[index]['age'][self.age_keyfield] 

399 else: 

400 msg = ( 

401 '{0} does not have age key "{1}" in IndexList ' 

402 ' metadata'.format(index, self.age_keyfield) 

403 ) 

404 self.__excludify(True, True, index, msg) 

405 # Sort alphabetically prior to age sort to keep sorting consistent 

406 temp_tuple = ( 

407 sorted(temp.items(), key=lambda k: k[0], reverse=reverse) 

408 ) 

409 # If reverse is True, this will sort so the youngest indices are first. 

410 # However, if you want oldest first, set reverse to False. 

411 # Effectively, this should set us up to act on everything older than 

412 # meets the other set criteria. 

413 # It starts as a tuple, but then becomes a list. 

414 sorted_tuple = ( 

415 sorted(temp_tuple, key=lambda k: k[1], reverse=reverse) 

416 ) 

417 return [x[0] for x in sorted_tuple] 

418 

419 def filter_by_regex(self, kind=None, value=None, exclude=False): 

420 """ 

421 Match indices by regular expression (pattern). 

422 

423 :arg kind: Can be one of: ``suffix``, ``prefix``, ``regex``, or 

424 ``timestring``. This option defines what kind of filter you will be 

425 building. 

426 :arg value: Depends on `kind`. It is the strftime string if `kind` is 

427 ``timestring``. It's used to build the regular expression for other 

428 kinds. 

429 :arg exclude: If `exclude` is `True`, this filter will remove matching 

430 indices from `indices`. If `exclude` is `False`, then only matching 

431 indices will be kept in `indices`. 

432 Default is `False` 

433 """ 

434 self.loggit.debug('Filtering indices by regex') 

435 if kind not in [ 'regex', 'prefix', 'suffix', 'timestring' ]: 

436 raise ValueError('{0}: Invalid value for kind'.format(kind)) 

437 

438 # Stop here if None or empty value, but zero is okay 

439 if value == 0: 

440 pass 

441 elif not value: 

442 raise ValueError( 

443 '{0}: Invalid value for "value". ' 

444 'Cannot be "None" type, empty, or False' 

445 ) 

446 

447 if kind == 'timestring': 

448 regex = settings.regex_map()[kind].format(utils.get_date_regex(value)) 

449 else: 

450 regex = settings.regex_map()[kind].format(value) 

451 

452 self.empty_list_check() 

453 pattern = re.compile(regex) 

454 for index in self.working_list(): 

455 self.loggit.debug('Filter by regex: Index: {0}'.format(index)) 

456 match = pattern.search(index) 

457 if match: 

458 self.__excludify(True, exclude, index) 

459 else: 

460 self.__excludify(False, exclude, index) 

461 

462 def filter_by_age(self, source='name', direction=None, timestring=None, 

463 unit=None, unit_count=None, field=None, stats_result='min_value', 

464 epoch=None, exclude=False, unit_count_pattern=False 

465 ): 

466 """ 

467 Match `indices` by relative age calculations. 

468 

469 :arg source: Source of index age. Can be one of 'name', 'creation_date', 

470 or 'field_stats' 

471 :arg direction: Time to filter, either ``older`` or ``younger`` 

472 :arg timestring: An strftime string to match the datestamp in an index 

473 name. Only used for index filtering by ``name``. 

474 :arg unit: One of ``seconds``, ``minutes``, ``hours``, ``days``, 

475 ``weeks``, ``months``, or ``years``. 

476 :arg unit_count: The number of ``unit`` (s). ``unit_count`` * ``unit`` will 

477 be calculated out to the relative number of seconds. 

478 :arg unit_count_pattern: A regular expression whose capture group identifies 

479 the value for ``unit_count``. 

480 :arg field: A timestamp field name. Only used for ``field_stats`` based 

481 calculations. 

482 :arg stats_result: Either `min_value` or `max_value`. Only used in 

483 conjunction with `source`=``field_stats`` to choose whether to 

484 reference the minimum or maximum result value. 

485 :arg epoch: An epoch timestamp used in conjunction with ``unit`` and 

486 ``unit_count`` to establish a point of reference for calculations. 

487 If not provided, the current time will be used. 

488 :arg exclude: If `exclude` is `True`, this filter will remove matching 

489 indices from `indices`. If `exclude` is `False`, then only matching 

490 indices will be kept in `indices`. 

491 Default is `False` 

492 """ 

493 

494 self.loggit.debug('Filtering indices by age') 

495 # Get timestamp point of reference, PoR 

496 PoR = utils.get_point_of_reference(unit, unit_count, epoch) 

497 if not direction: 

498 raise exceptions.MissingArgument('Must provide a value for "direction"') 

499 if direction not in ['older', 'younger']: 

500 raise ValueError( 

501 'Invalid value for "direction": {0}'.format(direction) 

502 ) 

503 self._calculate_ages( 

504 source=source, timestring=timestring, field=field, 

505 stats_result=stats_result 

506 ) 

507 if unit_count_pattern: 

508 try: 

509 unit_count_matcher = re.compile(unit_count_pattern) 

510 except: 

511 # We got an illegal regex, so won't be able to match anything 

512 unit_count_matcher = None 

513 for index in self.working_list(): 

514 try: 

515 removeThisIndex = False 

516 age = int(self.index_info[index]['age'][self.age_keyfield]) 

517 msg = ( 

518 'Index "{0}" age ({1}), direction: "{2}", point of ' 

519 'reference, ({3})'.format( 

520 index, 

521 age, 

522 direction, 

523 PoR 

524 ) 

525 ) 

526 # Because time adds to epoch, smaller numbers are actually older 

527 # timestamps. 

528 if unit_count_pattern: 

529 self.loggit.debug('Unit_count_pattern is set, trying to match pattern to index "{0}"'.format(index)) 

530 unit_count_from_index = utils.get_unit_count_from_name(index, unit_count_matcher) 

531 if unit_count_from_index: 

532 self.loggit.debug('Pattern matched, applying unit_count of "{0}"'.format(unit_count_from_index)) 

533 adjustedPoR = utils.get_point_of_reference(unit, unit_count_from_index, epoch) 

534 self.loggit.debug('Adjusting point of reference from {0} to {1} based on unit_count of {2} from index name'.format(PoR, adjustedPoR, unit_count_from_index)) 

535 elif unit_count == -1: 

536 # Unable to match pattern and unit_count is -1, meaning no fallback, so this 

537 # index is removed from the list 

538 self.loggit.debug('Unable to match pattern and no fallback value set. Removing index "{0}" from actionable list'.format(index)) 

539 removeThisIndex = True 

540 adjustedPoR = PoR # necessary to avoid exception if the first index is excluded 

541 else: 

542 # Unable to match the pattern and unit_count is set, so fall back to using unit_count 

543 # for determining whether to keep this index in the list 

544 self.loggit.debug('Unable to match pattern using fallback value of "{0}"'.format(unit_count)) 

545 adjustedPoR = PoR 

546 else: 

547 adjustedPoR = PoR 

548 if direction == 'older': 

549 agetest = age < adjustedPoR 

550 else: 

551 agetest = age > adjustedPoR 

552 self.__excludify(agetest and not removeThisIndex, exclude, index, msg) 

553 except KeyError: 

554 self.loggit.debug( 

555 'Index "{0}" does not meet provided criteria. ' 

556 'Removing from list.'.format(index)) 

557 self.indices.remove(index) 

558 

559 def filter_by_space( 

560 self, disk_space=None, reverse=True, use_age=False, 

561 source='creation_date', timestring=None, field=None, 

562 stats_result='min_value', exclude=False, threshold_behavior='greater_than'): 

563 """ 

564 Remove indices from the actionable list based on space 

565 consumed, sorted reverse-alphabetically by default. If you set 

566 `reverse` to `False`, it will be sorted alphabetically. 

567 

568 The default is usually what you will want. If only one kind of index is 

569 provided--for example, indices matching ``logstash-%Y.%m.%d``--then 

570 reverse alphabetical sorting will mean the oldest will remain in the 

571 list, because lower numbers in the dates mean older indices. 

572 

573 By setting `reverse` to `False`, then ``index3`` will be deleted before 

574 ``index2``, which will be deleted before ``index1`` 

575 

576 `use_age` allows ordering indices by age. Age is determined by the index 

577 creation date by default, but you can specify an `source` of ``name``, 

578 ``max_value``, or ``min_value``. The ``name`` `source` requires the 

579 timestring argument. 

580 

581 `threshold_behavior`, when set to `greater_than` (default), includes if it the index 

582 tests to be larger than `disk_space`. When set to `less_than`, it includes if 

583 the index is smaller than `disk_space` 

584 

585 :arg disk_space: Filter indices over *n* gigabytes 

586 :arg threshold_behavior: Size to filter, either ``greater_than`` or ``less_than``. Defaults 

587 to ``greater_than`` to preserve backwards compatability. 

588 :arg reverse: The filtering direction. (default: `True`). Ignored if 

589 `use_age` is `True` 

590 :arg use_age: Sort indices by age. ``source`` is required in this 

591 case. 

592 :arg source: Source of index age. Can be one of ``name``, 

593 ``creation_date``, or ``field_stats``. Default: ``creation_date`` 

594 :arg timestring: An strftime string to match the datestamp in an index 

595 name. Only used if `source` ``name`` is selected. 

596 :arg field: A timestamp field name. Only used if `source` 

597 ``field_stats`` is selected. 

598 :arg stats_result: Either `min_value` or `max_value`. Only used if 

599 `source` ``field_stats`` is selected. It determines whether to 

600 reference the minimum or maximum value of `field` in each index. 

601 :arg exclude: If `exclude` is `True`, this filter will remove matching 

602 indices from `indices`. If `exclude` is `False`, then only matching 

603 indices will be kept in `indices`. 

604 Default is `False` 

605 """ 

606 self.loggit.debug('Filtering indices by disk space') 

607 # Ensure that disk_space is a float 

608 if not disk_space: 

609 raise exceptions.MissingArgument('No value for "disk_space" provided') 

610 

611 if threshold_behavior not in ['greater_than', 'less_than']: 

612 raise ValueError( 

613 'Invalid value for "threshold_behavior": {0}'.format( 

614 threshold_behavior) 

615 ) 

616 

617 disk_space = float(disk_space) 

618 

619 disk_usage = 0.0 

620 disk_limit = disk_space * 2**30 

621 

622 self.loggit.debug( 

623 'Cannot get disk usage info from closed indices. ' 

624 'Omitting any closed indices.' 

625 ) 

626 self.filter_closed() 

627 

628 # Create a copy-by-value working list 

629 working_list = self.working_list() 

630 

631 if use_age: 

632 self._calculate_ages( 

633 source=source, timestring=timestring, field=field, 

634 stats_result=stats_result 

635 ) 

636 # Using default value of reverse=True in self._sort_by_age() 

637 sorted_indices = self._sort_by_age(working_list) 

638 

639 else: 

640 # Default to sorting by index name 

641 sorted_indices = sorted(working_list, reverse=reverse) 

642 

643 for index in sorted_indices: 

644 

645 disk_usage += self.index_info[index]['size_in_bytes'] 

646 msg = ( 

647 '{0}, summed disk usage is {1} and disk limit is {2}.'.format( 

648 index, utils.byte_size(disk_usage), utils.byte_size(disk_limit) 

649 ) 

650 ) 

651 if threshold_behavior == 'greater_than': 

652 self.__excludify((disk_usage > disk_limit), 

653 exclude, index, msg) 

654 elif threshold_behavior == 'less_than': 

655 self.__excludify((disk_usage < disk_limit), 

656 exclude, index, msg) 

657 

658 def filter_kibana(self, exclude=True): 

659 """ 

660 Match any index named ``.kibana*`` 

661 in `indices`. Older releases addressed index names that no longer exist. 

662 

663 :arg exclude: If `exclude` is `True`, this filter will remove matching 

664 indices from `indices`. If `exclude` is `False`, then only matching 

665 indices will be kept in `indices`. 

666 Default is `True` 

667 """ 

668 self.loggit.debug('Filtering kibana indices') 

669 self.empty_list_check() 

670 for index in self.working_list(): 

671 pattern = re.compile(r'^\.kibana.*$') 

672 if pattern.match(index): 

673 self.__excludify(True, exclude, index) 

674 else: 

675 self.__excludify(False, exclude, index) 

676 

677 def filter_forceMerged(self, max_num_segments=None, exclude=True): 

678 """ 

679 Match any index which has `max_num_segments` per shard or fewer in the 

680 actionable list. 

681 

682 :arg max_num_segments: Cutoff number of segments per shard. 

683 :arg exclude: If `exclude` is `True`, this filter will remove matching 

684 indices from `indices`. If `exclude` is `False`, then only matching 

685 indices will be kept in `indices`. 

686 Default is `True` 

687 """ 

688 self.loggit.debug('Filtering forceMerged indices') 

689 if not max_num_segments: 

690 raise exceptions.MissingArgument('Missing value for "max_num_segments"') 

691 self.loggit.debug( 

692 'Cannot get segment count of closed indices. ' 

693 'Omitting any closed indices.' 

694 ) 

695 self.filter_closed() 

696 self._get_segment_counts() 

697 for index in self.working_list(): 

698 # Do this to reduce long lines and make it more readable... 

699 shards = int(self.index_info[index]['number_of_shards']) 

700 replicas = int(self.index_info[index]['number_of_replicas']) 

701 segments = int(self.index_info[index]['segments']) 

702 msg = ( 

703 '{0} has {1} shard(s) + {2} replica(s) ' 

704 'with a sum total of {3} segments.'.format( 

705 index, shards, replicas, segments 

706 ) 

707 ) 

708 expected_count = ((shards + (shards * replicas)) * max_num_segments) 

709 self.__excludify((segments <= expected_count), exclude, index, msg) 

710 

711 

712 def filter_closed(self, exclude=True): 

713 """ 

714 Filter out closed indices from `indices` 

715 

716 :arg exclude: If `exclude` is `True`, this filter will remove matching 

717 indices from `indices`. If `exclude` is `False`, then only matching 

718 indices will be kept in `indices`. 

719 Default is `True` 

720 """ 

721 self.loggit.debug('Filtering closed indices') 

722 self.empty_list_check() 

723 for index in self.working_list(): 

724 condition = self.index_info[index]['state'] == 'close' 

725 self.loggit.debug('Index {0} state: {1}'.format( 

726 index, self.index_info[index]['state'] 

727 ) 

728 ) 

729 self.__excludify(condition, exclude, index) 

730 

731 def filter_empty(self, exclude=True): 

732 """ 

733 Filter indices with a document count of zero 

734 

735 Indices that are closed are automatically excluded from consideration 

736 due to closed indices reporting a document count of zero. 

737 

738 :arg exclude: If `exclude` is `True`, this filter will remove matching 

739 indices from `indices`. If `exclude` is `False`, then only matching 

740 indices will be kept in `indices`. 

741 Default is `True` 

742 """ 

743 self.loggit.debug('Filtering empty indices') 

744 self.filter_closed() 

745 self.empty_list_check() 

746 for index in self.working_list(): 

747 condition = self.index_info[index]['docs'] == 0 

748 self.loggit.debug('Index {0} doc count: {1}'.format( 

749 index, self.index_info[index]['docs'] 

750 ) 

751 ) 

752 self.__excludify(condition, exclude, index) 

753 

754 def filter_opened(self, exclude=True): 

755 """ 

756 Filter out opened indices from `indices` 

757 

758 :arg exclude: If `exclude` is `True`, this filter will remove matching 

759 indices from `indices`. If `exclude` is `False`, then only matching 

760 indices will be kept in `indices`. 

761 Default is `True` 

762 """ 

763 self.loggit.debug('Filtering open indices') 

764 self.empty_list_check() 

765 for index in self.working_list(): 

766 condition = self.index_info[index]['state'] == 'open' 

767 self.loggit.debug('Index {0} state: {1}'.format( 

768 index, self.index_info[index]['state'] 

769 ) 

770 ) 

771 self.__excludify(condition, exclude, index) 

772 

773 def filter_allocated(self, 

774 key=None, value=None, allocation_type='require', exclude=True, 

775 ): 

776 """ 

777 Match indices that have the routing allocation rule of 

778 `key=value` from `indices` 

779 

780 :arg key: The allocation attribute to check for 

781 :arg value: The value to check for 

782 :arg allocation_type: Type of allocation to apply 

783 :arg exclude: If `exclude` is `True`, this filter will remove matching 

784 indices from `indices`. If `exclude` is `False`, then only matching 

785 indices will be kept in `indices`. 

786 Default is `True` 

787 """ 

788 self.loggit.debug( 

789 'Filtering indices with shard routing allocation rules') 

790 if not key: 

791 raise exceptions.MissingArgument('No value for "key" provided') 

792 if not value: 

793 raise exceptions.MissingArgument('No value for "value" provided') 

794 if not allocation_type in ['include', 'exclude', 'require']: 

795 raise ValueError( 

796 'Invalid "allocation_type": {0}'.format(allocation_type) 

797 ) 

798 self.empty_list_check() 

799 index_lists = utils.chunk_index_list(self.indices) 

800 for l in index_lists: 

801 working_list = self.client.indices.get_settings(index=utils.to_csv(l)) 

802 if working_list: 

803 for index in list(working_list.keys()): 

804 try: 

805 has_routing = ( 

806 working_list[index]['settings']['index']['routing']['allocation'][allocation_type][key] == value 

807 ) 

808 except KeyError: 

809 has_routing = False 

810 # if has_routing: 

811 msg = ( 

812 '{0}: Routing (mis)match: ' 

813 'index.routing.allocation.{1}.{2}={3}.'.format( 

814 index, allocation_type, key, value 

815 ) 

816 ) 

817 # self.indices.remove(index) 

818 self.__excludify(has_routing, exclude, index, msg) 

819 

820 def filter_none(self): 

821 self.loggit.debug('"None" filter selected. No filtering will be done.') 

822 

823 def filter_by_alias(self, aliases=None, exclude=False): 

824 """ 

825 Match indices which are associated with the alias or list of aliases 

826 identified by `aliases`. 

827 

828 An update to Elasticsearch 5.5.0 changes the behavior of this from 

829 previous 5.x versions: 

830 https://www.elastic.co/guide/en/elasticsearch/reference/5.5/breaking-changes-5.5.html#breaking_55_rest_changes 

831 

832 What this means is that indices must appear in all aliases in list 

833 `aliases` or a 404 error will result, leading to no indices being 

834 matched. In older versions, if the index was associated with even one 

835 of the aliases in `aliases`, it would result in a match. 

836 

837 It is unknown if this behavior affects anyone. At the time this was 

838 written, no users have been bit by this. The code could be adapted 

839 to manually loop if the previous behavior is desired. But if no users 

840 complain, this will become the accepted/expected behavior. 

841 

842 :arg aliases: A list of alias names. 

843 :type aliases: list 

844 :arg exclude: If `exclude` is `True`, this filter will remove matching 

845 indices from `indices`. If `exclude` is `False`, then only matching 

846 indices will be kept in `indices`. 

847 Default is `False` 

848 """ 

849 self.loggit.debug( 

850 'Filtering indices matching aliases: "{0}"'.format(aliases)) 

851 if not aliases: 

852 raise exceptions.MissingArgument('No value for "aliases" provided') 

853 aliases = utils.ensure_list(aliases) 

854 self.empty_list_check() 

855 index_lists = utils.chunk_index_list(self.indices) 

856 for l in index_lists: 

857 try: 

858 # get_alias will either return {} or a NotFoundError. 

859 has_alias = list(self.client.indices.get_alias( 

860 index=utils.to_csv(l), 

861 name=utils.to_csv(aliases) 

862 ).keys()) 

863 self.loggit.debug('has_alias: {0}'.format(has_alias)) 

864 except NotFoundError: 

865 # if we see the NotFoundError, we need to set working_list to {} 

866 has_alias = [] 

867 for index in l: 

868 if index in has_alias: 

869 isOrNot = 'is' 

870 condition = True 

871 else: 

872 isOrNot = 'is not' 

873 condition = False 

874 msg = ( 

875 '{0} {1} associated with aliases: {2}'.format( 

876 index, isOrNot, aliases 

877 ) 

878 ) 

879 self.__excludify(condition, exclude, index, msg) 

880 

881 def filter_by_count( 

882 self, count=None, reverse=True, use_age=False, pattern=None, 

883 source='creation_date', timestring=None, field=None, 

884 stats_result='min_value', exclude=True): 

885 # pylint: disable=W1401 

886 """ 

887 Remove indices from the actionable list beyond the number `count`, 

888 sorted reverse-alphabetically by default. If you set `reverse` to 

889 `False`, it will be sorted alphabetically. 

890 

891 The default is usually what you will want. If only one kind of index is 

892 provided--for example, indices matching ``logstash-%Y.%m.%d``--then 

893 reverse alphabetical sorting will mean the oldest will remain in the 

894 list, because lower numbers in the dates mean older indices. 

895 

896 By setting `reverse` to `False`, then ``index3`` will be deleted before 

897 ``index2``, which will be deleted before ``index1`` 

898 

899 `use_age` allows ordering indices by age. Age is determined by the index 

900 creation date by default, but you can specify an `source` of ``name``, 

901 ``max_value``, or ``min_value``. The ``name`` `source` requires the 

902 timestring argument. 

903 

904 :arg count: Filter indices beyond `count`. 

905 :arg reverse: The filtering direction. (default: `True`). 

906 :arg use_age: Sort indices by age. ``source`` is required in this 

907 case. 

908 :arg pattern: Select indices to count from a regular expression 

909 pattern. This pattern must have one and only one capture group. 

910 This can allow a single ``count`` filter instance to operate against 

911 any number of matching patterns, and keep ``count`` of each index 

912 in that group. For example, given a ``pattern`` of ``'^(.*)-\d{6}$'``, 

913 it will match both ``rollover-000001`` and ``index-999990``, but not 

914 ``logstash-2017.10.12``. Following the same example, if my cluster 

915 also had ``rollover-000002`` through ``rollover-000010`` and 

916 ``index-888888`` through ``index-999999``, it will process both 

917 groups of indices, and include or exclude the ``count`` of each. 

918 :arg source: Source of index age. Can be one of ``name``, 

919 ``creation_date``, or ``field_stats``. Default: ``creation_date`` 

920 :arg timestring: An strftime string to match the datestamp in an index 

921 name. Only used if `source` ``name`` is selected. 

922 :arg field: A timestamp field name. Only used if `source` 

923 ``field_stats`` is selected. 

924 :arg stats_result: Either `min_value` or `max_value`. Only used if 

925 `source` ``field_stats`` is selected. It determines whether to 

926 reference the minimum or maximum value of `field` in each index. 

927 :arg exclude: If `exclude` is `True`, this filter will remove matching 

928 indices from `indices`. If `exclude` is `False`, then only matching 

929 indices will be kept in `indices`. 

930 Default is `True` 

931 """ 

932 self.loggit.debug('Filtering indices by count') 

933 if not count: 

934 raise exceptions.MissingArgument('No value for "count" provided') 

935 

936 # Create a copy-by-value working list 

937 working_list = self.working_list() 

938 if pattern: 

939 try: 

940 r = re.compile(pattern) 

941 if r.groups < 1: 

942 raise exceptions.ConfigurationError('No regular expression group found in {0}'.format(pattern)) 

943 elif r.groups > 1: 

944 raise exceptions.ConfigurationError('More than 1 regular expression group found in {0}'.format(pattern)) 

945 # Prune indices not matching the regular expression the object (and filtered_indices) 

946 # We do not want to act on them by accident. 

947 prune_these = list(filter(lambda x: r.match(x) is None, working_list)) 

948 filtered_indices = working_list 

949 for index in prune_these: 

950 msg = ( 

951 '{0} does not match regular expression {1}.'.format( 

952 index, pattern 

953 ) 

954 ) 

955 condition = True 

956 exclude = True 

957 self.__excludify(condition, exclude, index, msg) 

958 # also remove it from filtered_indices 

959 filtered_indices.remove(index) 

960 # Presort these filtered_indices using the lambda 

961 presorted = sorted(filtered_indices, key=lambda x: r.match(x).group(1)) 

962 except Exception as e: 

963 raise exceptions.ActionError('Unable to process pattern: "{0}". Error: {1}'.format(pattern, e)) 

964 # Initialize groups here 

965 groups = [] 

966 # We have to pull keys k this way, but we don't need to keep them 

967 # We only need g for groups 

968 for _, g in itertools.groupby(presorted, key=lambda x: r.match(x).group(1)): 

969 groups.append(list(g)) 

970 else: 

971 # Since pattern will create a list of lists, and we iterate over that, 

972 # we need to put our single list inside a list 

973 groups = [ working_list ] 

974 for group in groups: 

975 if use_age: 

976 if source != 'name': 

977 self.loggit.warning( 

978 'Cannot get age information from closed indices unless ' 

979 'source="name". Omitting any closed indices.' 

980 ) 

981 self.filter_closed() 

982 self._calculate_ages( 

983 source=source, timestring=timestring, field=field, 

984 stats_result=stats_result 

985 ) 

986 # Using default value of reverse=True in self._sort_by_age() 

987 sorted_indices = self._sort_by_age(group, reverse=reverse) 

988 

989 else: 

990 # Default to sorting by index name 

991 sorted_indices = sorted(group, reverse=reverse) 

992 

993 

994 idx = 1 

995 for index in sorted_indices: 

996 msg = ( 

997 '{0} is {1} of specified count of {2}.'.format( 

998 index, idx, count 

999 ) 

1000 ) 

1001 condition = True if idx <= count else False 

1002 self.__excludify(condition, exclude, index, msg) 

1003 idx += 1 

1004 

1005 def filter_by_shards(self, number_of_shards=None, shard_filter_behavior='greater_than', exclude=False): 

1006 """ 

1007 Match `indices` with a given shard count. 

1008 

1009 Selects all indices with a shard count 'greater_than' number_of_shards by default. 

1010 Use shard_filter_behavior to select indices with shard count 'greater_than', 'greater_than_or_equal', 

1011 'less_than', 'less_than_or_equal', or 'equal' to number_of_shards. 

1012 

1013 :arg number_of_shards: shard threshold 

1014 :arg shard_filter_behavior: Do you want to filter on greater_than, greater_than_or_equal, less_than, 

1015 less_than_or_equal, or equal? 

1016 :arg exclude: If `exclude` is `True`, this filter will remove matching 

1017 indices from `indices`. If `exclude` is `False`, then only matching 

1018 indices will be kept in `indices`. 

1019 Default is `False` 

1020 """ 

1021 self.loggit.debug("Filtering indices by number of shards") 

1022 if not number_of_shards: 

1023 raise exceptions.MissingArgument('No value for "number_of_shards" provided') 

1024 

1025 if shard_filter_behavior not in ['greater_than', 'less_than', 'greater_than_or_equal', 'less_than_or_equal', 'equal']: 

1026 raise ValueError( 

1027 'Invalid value for "shard_filter_behavior": {0}'.format( 

1028 shard_filter_behavior) 

1029 ) 

1030 

1031 if number_of_shards < 1 or (shard_filter_behavior == 'less_than' and number_of_shards == 1): 

1032 raise ValueError( 

1033 'Unacceptable value: {0} -- "number_of_shards" cannot be less than 1. A valid index ' 

1034 'will have at least one shard.'.format(number_of_shards) 

1035 ) 

1036 

1037 self.empty_list_check() 

1038 for index in self.working_list(): 

1039 self.loggit.debug('Filter by number of shards: Index: {0}'.format(index)) 

1040 

1041 if shard_filter_behavior == 'greater_than': 

1042 condition = int(self.index_info[index]['number_of_shards']) > number_of_shards 

1043 elif shard_filter_behavior == 'less_than': 

1044 condition = int(self.index_info[index]['number_of_shards']) < number_of_shards 

1045 elif shard_filter_behavior == 'greater_than_or_equal': 

1046 condition = int(self.index_info[index]['number_of_shards']) >= number_of_shards 

1047 elif shard_filter_behavior == 'less_than_or_equal': 

1048 condition = int(self.index_info[index]['number_of_shards']) <= number_of_shards 

1049 else: 

1050 condition = int(self.index_info[index]['number_of_shards']) == number_of_shards 

1051 

1052 self.__excludify(condition, exclude, index) 

1053 

1054 def filter_period( 

1055 self, period_type='relative', source='name', range_from=None, range_to=None, 

1056 date_from=None, date_to=None, date_from_format=None, date_to_format=None, 

1057 timestring=None, unit=None, field=None, stats_result='min_value', 

1058 intersect=False, week_starts_on='sunday', epoch=None, exclude=False, 

1059 ): 

1060 """ 

1061 Match `indices` with ages within a given period. 

1062 

1063 :arg period_type: Can be either ``absolute`` or ``relative``. Default is 

1064 ``relative``. ``date_from`` and ``date_to`` are required when using 

1065 ``period_type='absolute'``. ``range_from`` and ``range_to`` are 

1066 required with ``period_type='relative'``. 

1067 :arg source: Source of index age. Can be one of 'name', 'creation_date', 

1068 or 'field_stats' 

1069 :arg range_from: How many ``unit`` (s) in the past/future is the origin? 

1070 :arg range_to: How many ``unit`` (s) in the past/future is the end point? 

1071 :arg date_from: The simplified date for the start of the range 

1072 :arg date_to: The simplified date for the end of the range. If this value 

1073 is the same as ``date_from``, the full value of ``unit`` will be 

1074 extrapolated for the range. For example, if ``unit`` is ``months``, 

1075 and ``date_from`` and ``date_to`` are both ``2017.01``, then the entire 

1076 month of January 2017 will be the absolute date range. 

1077 :arg date_from_format: The strftime string used to parse ``date_from`` 

1078 :arg date_to_format: The strftime string used to parse ``date_to`` 

1079 :arg timestring: An strftime string to match the datestamp in an index 

1080 name. Only used for index filtering by ``name``. 

1081 :arg unit: One of ``hours``, ``days``, ``weeks``, ``months``, or 

1082 ``years``. 

1083 :arg field: A timestamp field name. Only used for ``field_stats`` based 

1084 calculations. 

1085 :arg stats_result: Either `min_value` or `max_value`. Only used in 

1086 conjunction with ``source='field_stats'`` to choose whether to 

1087 reference the minimum or maximum result value. 

1088 :arg intersect: Only used when ``source='field_stats'``. 

1089 If `True`, only indices where both `min_value` and `max_value` are 

1090 within the period will be selected. If `False`, it will use whichever 

1091 you specified. Default is `False` to preserve expected behavior. 

1092 :arg week_starts_on: Either ``sunday`` or ``monday``. Default is 

1093 ``sunday`` 

1094 :arg epoch: An epoch timestamp used to establish a point of reference 

1095 for calculations. If not provided, the current time will be used. 

1096 :arg exclude: If `exclude` is `True`, this filter will remove matching 

1097 indices from `indices`. If `exclude` is `False`, then only matching 

1098 indices will be kept in `indices`. 

1099 Default is `False` 

1100 """ 

1101 

1102 self.loggit.debug('Filtering indices by period') 

1103 if period_type not in ['absolute', 'relative']: 

1104 raise ValueError( 

1105 'Unacceptable value: {0} -- "period_type" must be either "absolute" or ' 

1106 '"relative".'.format(period_type) 

1107 ) 

1108 if period_type == 'relative': 

1109 func = utils.date_range 

1110 args = [unit, range_from, range_to, epoch] 

1111 kwgs = { 'week_starts_on': week_starts_on } 

1112 if type(range_from) != type(int()) or type(range_to) != type(int()): 

1113 raise exceptions.ConfigurationError( 

1114 '"range_from" and "range_to" must be integer values') 

1115 else: 

1116 func = utils.absolute_date_range 

1117 args = [unit, date_from, date_to] 

1118 kwgs = { 'date_from_format': date_from_format, 'date_to_format': date_to_format } 

1119 for reqd in [date_from, date_to, date_from_format, date_to_format]: 

1120 if not reqd: 

1121 raise exceptions.ConfigurationError( 

1122 'Must provide "date_from", "date_to", "date_from_format", and ' 

1123 '"date_to_format" with absolute period_type' 

1124 ) 

1125 try: 

1126 start, end = func(*args, **kwgs) 

1127 except Exception as e: 

1128 utils.report_failure(e) 

1129 

1130 self._calculate_ages( 

1131 source=source, timestring=timestring, field=field, 

1132 stats_result=stats_result 

1133 ) 

1134 for index in self.working_list(): 

1135 try: 

1136 if source == 'field_stats' and intersect: 

1137 min_age = int(self.index_info[index]['age']['min_value']) 

1138 max_age = int(self.index_info[index]['age']['max_value']) 

1139 msg = ( 

1140 'Index "{0}", timestamp field "{1}", min_value ({2}), ' 

1141 'max_value ({3}), period start: "{4}", period ' 

1142 'end, "{5}"'.format( 

1143 index, 

1144 field, 

1145 min_age, 

1146 max_age, 

1147 start, 

1148 end 

1149 ) 

1150 ) 

1151 # Because time adds to epoch, smaller numbers are actually older 

1152 # timestamps. 

1153 inrange = ((min_age >= start) and (max_age <= end)) 

1154 else: 

1155 age = int(self.index_info[index]['age'][self.age_keyfield]) 

1156 msg = ( 

1157 'Index "{0}" age ({1}), period start: "{2}", period ' 

1158 'end, "{3}"'.format( 

1159 index, 

1160 age, 

1161 start, 

1162 end 

1163 ) 

1164 ) 

1165 # Because time adds to epoch, smaller numbers are actually older 

1166 # timestamps. 

1167 inrange = ((age >= start) and (age <= end)) 

1168 self.__excludify(inrange, exclude, index, msg) 

1169 except KeyError: 

1170 self.loggit.debug( 

1171 'Index "{0}" does not meet provided criteria. ' 

1172 'Removing from list.'.format(index)) 

1173 self.indices.remove(index) 

1174 

1175 def filter_ilm(self, exclude=True): 

1176 """ 

1177 Match indices that have the setting `index.lifecycle.name` 

1178 

1179 :arg exclude: If `exclude` is `True`, this filter will remove matching 

1180 indices from `indices`. If `exclude` is `False`, then only matching 

1181 indices will be kept in `indices`. 

1182 Default is `True` 

1183 """ 

1184 self.loggit.debug('Filtering indices with index.lifecycle.name') 

1185 index_lists = utils.chunk_index_list(self.indices) 

1186 if index_lists == [['']]: 

1187 self.loggit.debug('Empty working list. No ILM indices to filter.') 

1188 return 

1189 for l in index_lists: 

1190 working_list = self.client.indices.get_settings(index=utils.to_csv(l)) 

1191 if working_list: 

1192 for index in list(working_list.keys()): 

1193 try: 

1194 subvalue = working_list[index]['settings']['index']['lifecycle'] 

1195 has_ilm = 'name' in subvalue 

1196 msg = '{0} has index.lifecycle.name {1}'.format(index, subvalue['name']) 

1197 except KeyError: 

1198 has_ilm = False 

1199 msg = 'index.lifecycle.name is not set for index {0}'.format(index) 

1200 self.__excludify(has_ilm, exclude, index, msg) 

1201 

1202 def iterate_filters(self, filter_dict): 

1203 """ 

1204 Iterate over the filters defined in `config` and execute them. 

1205 

1206 :arg filter_dict: The configuration dictionary 

1207 

1208 .. note:: `filter_dict` should be a dictionary with the following form: 

1209 .. code-block:: python 

1210 

1211 { 'filters' : [ 

1212 { 

1213 'filtertype': 'the_filter_type', 

1214 'key1' : 'value1', 

1215 ... 

1216 'keyN' : 'valueN' 

1217 } 

1218 ] 

1219 } 

1220 

1221 """ 

1222 self.loggit.debug('Iterating over a list of filters') 

1223 # Make sure we actually _have_ filters to act on 

1224 if not 'filters' in filter_dict or len(filter_dict['filters']) < 1: 

1225 self.loggit.info('No filters in config. Returning unaltered object.') 

1226 return 

1227 

1228 self.loggit.debug('All filters: {0}'.format(filter_dict['filters'])) 

1229 for f in filter_dict['filters']: 

1230 self.loggit.debug('Top of the loop: {0}'.format(self.indices)) 

1231 self.loggit.debug('Un-parsed filter args: {0}'.format(f)) 

1232 # Make sure we got at least this much in the configuration 

1233 self.loggit.debug('Parsed filter args: {0}'.format( 

1234 SchemaCheck( 

1235 f, 

1236 filters.structure(), 

1237 'filter', 

1238 'IndexList.iterate_filters' 

1239 ).result() 

1240 ) 

1241 ) 

1242 method = self.__map_method(f['filtertype']) 

1243 del f['filtertype'] 

1244 # If it's a filtertype with arguments, update the defaults with the 

1245 # provided settings. 

1246 if f: 

1247 self.loggit.debug('Filter args: {0}'.format(f)) 

1248 self.loggit.debug('Pre-instance: {0}'.format(self.indices)) 

1249 method(**f) 

1250 self.loggit.debug('Post-instance: {0}'.format(self.indices)) 

1251 else: 

1252 # Otherwise, it's a settingless filter. 

1253 method() 

1254 

1255 def filter_by_size( 

1256 self, size_threshold=None, threshold_behavior='greater_than', exclude=False, size_behavior='primary'): 

1257 """ 

1258 Remove indices from the actionable list based on index size. 

1259 

1260 `threshold_behavior`, when set to `greater_than` (default), includes if it the index 

1261 tests to be larger than `size_threshold`. When set to `less_than`, it includes if 

1262 the index is smaller than `size_threshold` 

1263 

1264 :arg size_threshold: Filter indices over *n* gigabytes 

1265 :arg threshold_behavior: Size to filter, either ``greater_than`` or ``less_than``. Defaults 

1266 to ``greater_than`` to preserve backwards compatability. 

1267 :arg size_behavior: Size that used to filter, either ``primary`` or ``total``. Defaults to ``primary`` 

1268 :arg exclude: If `exclude` is `True`, this filter will remove matching 

1269 indices from `indices`. If `exclude` is `False`, then only matching 

1270 indices will be kept in `indices`. 

1271 Default is `False` 

1272 """ 

1273 self.loggit.debug('Filtering indices by index size') 

1274 # Ensure that disk_space is a float 

1275 if not size_threshold: 

1276 raise exceptions.MissingArgument('No value for "size_threshold" provided') 

1277 

1278 if size_behavior not in ['primary', 'total']: 

1279 raise ValueError( 'Invalid value for "size_behavior": {0}'.format(size_behavior)) 

1280 

1281 if threshold_behavior not in ['greater_than', 'less_than']: 

1282 raise ValueError( 'Invalid value for "threshold_behavior": {0}'.format(threshold_behavior)) 

1283 

1284 index_size_limit = float(size_threshold) * 2**30 

1285 

1286 self.loggit.debug( 

1287 'Cannot get disk usage info from closed indices. ' 

1288 'Omitting any closed indices.' 

1289 ) 

1290 self.filter_closed() 

1291 

1292 # Create a copy-by-value working list 

1293 working_list = self.working_list() 

1294 

1295 for index in working_list: 

1296 

1297 if size_behavior == 'primary': 

1298 index_size = self.index_info[index]['primary_size_in_bytes'] 

1299 else: 

1300 index_size = self.index_info[index]['size_in_bytes'] 

1301 

1302 msg = ( 

1303 '{0}, index size is {1} and size limit is {2}.'.format( 

1304 index, utils.byte_size(index_size), utils.byte_size(index_size_limit) 

1305 ) 

1306 ) 

1307 if threshold_behavior == 'greater_than': 

1308 self.__excludify((index_size > index_size_limit), exclude, index, msg) 

1309 elif threshold_behavior == 'less_than': 

1310 self.__excludify((index_size < index_size_limit), exclude, index, msg)