Coverage for curator/utils.py: 98%

928 statements  

« prev     ^ index     » next       coverage.py v7.3.0, created at 2023-08-16 15:36 -0600

1"""Utilities""" 

2import time 

3import logging 

4import os 

5import random 

6import re 

7import string 

8import sys 

9from datetime import timedelta, datetime, date 

10import base64 

11import yaml 

12import elasticsearch7 

13from voluptuous import Schema 

14from curator import exceptions 

15from curator.defaults import settings 

16from curator.validators import SchemaCheck, actions, filters, options 

17from curator._version import __version__ 

18LOGGER = logging.getLogger(__name__) 

19 

20def read_file(myfile): 

21 """ 

22 Read a file and return the resulting data. 

23 

24 :arg myfile: A file to read. 

25 :rtype: str 

26 """ 

27 try: 

28 with open(myfile, 'r') as fhandle: 

29 data = fhandle.read() 

30 return data 

31 except IOError: 

32 raise exceptions.FailedExecution( 

33 'Unable to read file {0}'.format(myfile) 

34 ) 

35 

36def get_yaml(path): 

37 """ 

38 Read the file identified by `path` and import its YAML contents. 

39 

40 :arg path: The path to a YAML configuration file. 

41 :rtype: dict 

42 """ 

43 # Set the stage here to parse single scalar value environment vars from 

44 # the YAML file being read 

45 single = re.compile(r'^\$\{(.*)\}$') 

46 yaml.add_implicit_resolver("!single", single) 

47 def single_constructor(loader, node): 

48 value = loader.construct_scalar(node) 

49 proto = single.match(value).group(1) 

50 default = None 

51 if len(proto.split(':')) > 1: 

52 envvar, default = proto.split(':') 

53 else: 

54 envvar = proto 

55 return os.environ[envvar] if envvar in os.environ else default 

56 

57 yaml.add_constructor('!single', single_constructor) 

58 

59 try: 

60 return yaml.load(read_file(path), Loader=yaml.FullLoader) 

61 except yaml.scanner.ScannerError as err: 

62 print('Unable to read/parse YAML file: {0}'.format(path)) 

63 print(err) 

64 sys.exit(1) 

65 

66def test_client_options(config): 

67 """ 

68 Test whether a SSL/TLS files exist. Will raise an exception if the files 

69 cannot be read. 

70 

71 :arg config: A client configuration file data dictionary 

72 :rtype: None 

73 """ 

74 if config['use_ssl']: 

75 # Test whether certificate is a valid file path 

76 if 'certificate' in config and config['certificate']: 

77 read_file(config['certificate']) 

78 # Test whether client_cert is a valid file path 

79 if 'client_cert' in config and config['client_cert']: 

80 read_file(config['client_cert']) 

81 # Test whether client_key is a valid file path 

82 if 'client_key' in config and config['client_key']: 

83 read_file(config['client_key']) 

84 

85def rollable_alias(client, alias): 

86 """ 

87 Ensure that `alias` is an alias, and points to an index that can use the 

88 ``_rollover`` API. 

89 

90 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

91 :arg alias: An Elasticsearch alias 

92 """ 

93 try: 

94 response = client.indices.get_alias(name=alias) 

95 except elasticsearch7.exceptions.NotFoundError: 

96 LOGGER.error('alias "{0}" not found.'.format(alias)) 

97 return False 

98 # Response should be like: 

99 # {'there_should_be_only_one': {u'aliases': {'value of "alias" here': {}}}} 

100 # Where 'there_should_be_only_one' is a single index name that ends in a 

101 # number, and 'value of "alias" here' reflects the value of the passed 

102 # parameter, except in versions 6.5.0+ where the ``is_write_index`` setting 

103 # makes it possible to have more than one index associated with a rollover index 

104 if get_version(client) >= (6, 5, 0): 

105 for idx in response: 

106 if 'is_write_index' in response[idx]['aliases'][alias]: 

107 if response[idx]['aliases'][alias]['is_write_index']: 

108 return True 

109 # implied `else` here: If not version 6.5.0+ and has `is_write_index`, it has to fit the 

110 # following criteria: 

111 if len(response) > 1: 

112 LOGGER.error( 

113 '"alias" must only reference one index: {0}'.format(response)) 

114 else: 

115 index = list(response.keys())[0] 

116 rollable = False 

117 # In order for `rollable` to be True, the last 2 digits of the index 

118 # must be digits, or a hyphen followed by a digit. 

119 # NOTE: This is not a guarantee that the rest of the index name is 

120 # necessarily correctly formatted. 

121 if index[-2:][1].isdigit(): 

122 if index[-2:][0].isdigit(): 

123 rollable = True 

124 elif index[-2:][0] == '-': 

125 rollable = True 

126 return rollable 

127 

128def verify_client_object(test): 

129 """ 

130 Test if `test` is a proper :class:`elasticsearch7.Elasticsearch` client 

131 object and raise an exception if it is not. 

132 

133 :arg test: The variable or object to test 

134 :rtype: None 

135 """ 

136 # Ignore mock type for testing 

137 if str(type(test)) == "<class 'mock.Mock'>" or \ 

138 str(type(test)) == "<class 'mock.mock.Mock'>": 

139 pass 

140 elif not isinstance(test, elasticsearch7.Elasticsearch): 

141 raise TypeError( 

142 'Not a client object. Type: {0}'.format(type(test)) 

143 ) 

144 

145def verify_index_list(test): 

146 """ 

147 Test if `test` is a proper :class:`curator.indexlist.IndexList` object and 

148 raise an exception if it is not. 

149 

150 :arg test: The variable or object to test 

151 :rtype: None 

152 """ 

153 # It breaks if this import isn't local to this function 

154 from .indexlist import IndexList 

155 if not isinstance(test, IndexList): 

156 raise TypeError( 

157 'Not an IndexList object. Type: {0}.'.format(type(test)) 

158 ) 

159 

160def verify_snapshot_list(test): 

161 """ 

162 Test if `test` is a proper :class:`curator.snapshotlist.SnapshotList` 

163 object and raise an exception if it is not. 

164 

165 :arg test: The variable or object to test 

166 :rtype: None 

167 """ 

168 # It breaks if this import isn't local to this function 

169 from .snapshotlist import SnapshotList 

170 if not isinstance(test, SnapshotList): 

171 raise TypeError( 

172 'Not an SnapshotList object. Type: {0}.'.format(type(test)) 

173 ) 

174 

175def report_failure(exception): 

176 """ 

177 Raise a `exceptions.FailedExecution` exception and include the original error message. 

178 

179 :arg exception: The upstream exception. 

180 :rtype: None 

181 """ 

182 raise exceptions.FailedExecution( 

183 'Exception encountered. Rerun with loglevel DEBUG and/or check ' 

184 'Elasticsearch logs for more information. ' 

185 'Exception: {0}'.format(exception) 

186 ) 

187 

188def get_date_regex(timestring): 

189 """ 

190 Return a regex string based on a provided strftime timestring. 

191 

192 :arg timestring: An strftime pattern 

193 :rtype: str 

194 """ 

195 prev, regex = ('', '') 

196 LOGGER.debug('Provided timestring = "{0}"'.format(timestring)) 

197 for idx, char in enumerate(timestring): 

198 LOGGER.debug('Current character: {0} Array position: {1}'.format(char, idx)) 

199 if char == '%': 

200 pass 

201 elif char in settings.date_regex() and prev == '%': 

202 regex += r'\d{' + settings.date_regex()[char] + '}' 

203 elif char in ['.', '-']: 

204 regex += "\\" + char 

205 else: 

206 regex += char 

207 prev = char 

208 LOGGER.debug("regex = {0}".format(regex)) 

209 return regex 

210 

211def get_datetime(index_timestamp, timestring): 

212 """ 

213 Return the datetime extracted from the index name, which is the index 

214 creation time. 

215 

216 :arg index_timestamp: The timestamp extracted from an index name 

217 :arg timestring: An strftime pattern 

218 :rtype: :py:class:`datetime.datetime` 

219 """ 

220 # Compensate for week of year by appending '%w' to the timestring 

221 # and '1' (Monday) to index_timestamp 

222 iso_week_number = False 

223 if '%W' in timestring or '%U' in timestring or '%V' in timestring: 

224 timestring += '%w' 

225 index_timestamp += '1' 

226 if '%V' in timestring and '%G' in timestring: 

227 iso_week_number = True 

228 # Fake as so we read Greg format instead. We will process it later 

229 timestring = timestring.replace("%G", "%Y").replace("%V", "%W") 

230 elif '%m' in timestring: 

231 if not '%d' in timestring: 

232 timestring += '%d' 

233 index_timestamp += '1' 

234 

235 date = datetime.strptime(index_timestamp, timestring) 

236 

237 # Handle ISO time string 

238 if iso_week_number: 

239 date = _handle_iso_week_number(date, timestring, index_timestamp) 

240 

241 return date 

242 

243def fix_epoch(epoch): 

244 """ 

245 Fix value of `epoch` to be epoch, which should be 10 or fewer digits long. 

246 

247 :arg epoch: An epoch timestamp, in epoch + milliseconds, or microsecond, or 

248 even nanoseconds. 

249 :rtype: int 

250 """ 

251 try: 

252 # No decimals allowed 

253 epoch = int(epoch) 

254 except Exception as err: 

255 raise ValueError( 

256 'Invalid epoch received, unable to convert {0} to int. {1}'.format(epoch, err)) 

257 

258 # If we're still using this script past January, 2038, we have bigger 

259 # problems than my hacky math here... 

260 if len(str(epoch)) <= 10: 

261 # Epoch is fine, no changes 

262 pass 

263 elif len(str(epoch)) > 10 and len(str(epoch)) <= 13: 

264 epoch = int(epoch/1000) 

265 else: 

266 orders_of_magnitude = len(str(epoch)) - 10 

267 powers_of_ten = 10**orders_of_magnitude 

268 epoch = int(epoch/powers_of_ten) 

269 return epoch 

270 

271def _handle_iso_week_number(date, timestring, index_timestamp): 

272 date_iso = date.isocalendar() 

273 iso_week_str = "{Y:04d}{W:02d}".format(Y=date_iso[0], W=date_iso[1]) 

274 greg_week_str = datetime.strftime(date, "%Y%W") 

275 

276 # Edge case 1: ISO week number is bigger than Greg week number. 

277 # Ex: year 2014, all ISO week numbers were 1 more than in Greg. 

278 if (iso_week_str > greg_week_str or 

279 # Edge case 2: 2010-01-01 in ISO: 2009.W53, in Greg: 2010.W00 

280 # For Greg converting 2009.W53 gives 2010-01-04, converting back 

281 # to same timestring gives: 2010.W01. 

282 datetime.strftime(date, timestring) != index_timestamp): 

283 

284 # Remove one week in this case 

285 date = date - timedelta(days=7) 

286 return date 

287 

288def datetime_to_epoch(mydate): 

289 """Convert datetime into epoch seconds""" 

290 # I would have used `total_seconds`, but apparently that's new 

291 # to Python 2.7+, and due to so many people still using 

292 # RHEL/CentOS 6, I need this to support Python 2.6. 

293 tdelta = (mydate - datetime(1970, 1, 1)) 

294 return tdelta.seconds + tdelta.days * 24 * 3600 

295 

296class TimestringSearch(object): 

297 """ 

298 An object to allow repetitive search against a string, `searchme`, without 

299 having to repeatedly recreate the regex. 

300 

301 :arg timestring: An strftime pattern 

302 """ 

303 def __init__(self, timestring): 

304 regex = r'(?P<date>{0})'.format(get_date_regex(timestring)) 

305 self.pattern = re.compile(regex) 

306 self.timestring = timestring 

307 def get_epoch(self, searchme): 

308 """ 

309 Return the epoch timestamp extracted from the `timestring` appearing in 

310 `searchme`. 

311 

312 :arg searchme: A string to be searched for a date pattern that matches 

313 `timestring` 

314 :rtype: int 

315 """ 

316 match = self.pattern.search(searchme) 

317 if match: 

318 if match.group("date"): 

319 timestamp = match.group("date") 

320 return datetime_to_epoch( 

321 get_datetime(timestamp, self.timestring) 

322 ) 

323 # # I would have used `total_seconds`, but apparently that's new 

324 # # to Python 2.7+, and due to so many people still using 

325 # # RHEL/CentOS 6, I need this to support Python 2.6. 

326 # tdelta = ( 

327 # get_datetime(timestamp, self.timestring) - 

328 # datetime(1970,1,1) 

329 # ) 

330 # return tdelta.seconds + tdelta.days * 24 * 3600 

331 

332def get_point_of_reference(unit, count, epoch=None): 

333 """ 

334 Get a point-of-reference timestamp in epoch + milliseconds by deriving 

335 from a `unit` and a `count`, and an optional reference timestamp, `epoch` 

336 

337 :arg unit: One of ``seconds``, ``minutes``, ``hours``, ``days``, ``weeks``, 

338 ``months``, or ``years``. 

339 :arg unit_count: The number of ``units``. ``unit_count`` * ``unit`` will 

340 be calculated out to the relative number of seconds. 

341 :arg epoch: An epoch timestamp used in conjunction with ``unit`` and 

342 ``unit_count`` to establish a point of reference for calculations. 

343 :rtype: int 

344 """ 

345 if unit == 'seconds': 

346 multiplier = 1 

347 elif unit == 'minutes': 

348 multiplier = 60 

349 elif unit == 'hours': 

350 multiplier = 3600 

351 elif unit == 'days': 

352 multiplier = 3600*24 

353 elif unit == 'weeks': 

354 multiplier = 3600*24*7 

355 elif unit == 'months': 

356 multiplier = 3600*24*30 

357 elif unit == 'years': 

358 multiplier = 3600*24*365 

359 else: 

360 raise ValueError('Invalid unit: {0}.'.format(unit)) 

361 # Use this moment as a reference point, if one is not provided. 

362 if not epoch: 

363 epoch = time.time() 

364 epoch = fix_epoch(epoch) 

365 return epoch - multiplier * count 

366 

367def get_unit_count_from_name(index_name, pattern): 

368 """Derive the unit_count from the index name""" 

369 if pattern is None: 

370 return None 

371 match = pattern.search(index_name) 

372 if match: 

373 try: 

374 return int(match.group(1)) 

375 except Exception: 

376 return None 

377 else: 

378 return None 

379 

380def date_range(unit, range_from, range_to, epoch=None, week_starts_on='sunday'): 

381 """ 

382 Get the epoch start time and end time of a range of ``unit``s, reckoning the 

383 start of the week (if that's the selected unit) based on ``week_starts_on``, 

384 which can be either ``sunday`` or ``monday``. 

385 

386 :arg unit: One of ``hours``, ``days``, ``weeks``, ``months``, or ``years``. 

387 :arg range_from: How many ``unit`` (s) in the past/future is the origin? 

388 :arg range_to: How many ``unit`` (s) in the past/future is the end point? 

389 :arg epoch: An epoch timestamp used to establish a point of reference for 

390 calculations. 

391 :arg week_starts_on: Either ``sunday`` or ``monday``. Default is ``sunday`` 

392 :rtype: tuple 

393 """ 

394 acceptable_units = ['hours', 'days', 'weeks', 'months', 'years'] 

395 if unit not in acceptable_units: 

396 raise exceptions.ConfigurationError( 

397 '"unit" must be one of: {0}'.format(acceptable_units)) 

398 if not range_to >= range_from: 

399 raise exceptions.ConfigurationError( 

400 '"range_to" must be greater than or equal to "range_from"') 

401 if not epoch: 

402 epoch = time.time() 

403 epoch = fix_epoch(epoch) 

404 raw_point_of_ref = datetime.utcfromtimestamp(epoch) 

405 LOGGER.debug('Raw point of Reference = {0}'.format(raw_point_of_ref)) 

406 # Reverse the polarity, because -1 as last week makes sense when read by 

407 # humans, but datetime timedelta math makes -1 in the future. 

408 origin = range_from * -1 

409 # These if statements help get the start date or start_delta 

410 if unit == 'hours': 

411 point_of_ref = datetime( 

412 raw_point_of_ref.year, raw_point_of_ref.month, raw_point_of_ref.day, 

413 raw_point_of_ref.hour, 0, 0 

414 ) 

415 start_delta = timedelta(hours=origin) 

416 if unit == 'days': 

417 point_of_ref = datetime( 

418 raw_point_of_ref.year, raw_point_of_ref.month, 

419 raw_point_of_ref.day, 0, 0, 0 

420 ) 

421 start_delta = timedelta(days=origin) 

422 if unit == 'weeks': 

423 point_of_ref = datetime( 

424 raw_point_of_ref.year, raw_point_of_ref.month, raw_point_of_ref.day, 0, 0, 0) 

425 sunday = False 

426 if week_starts_on.lower() == 'sunday': 

427 sunday = True 

428 weekday = point_of_ref.weekday() 

429 # Compensate for ISO week starting on Monday by default 

430 if sunday: 

431 weekday += 1 

432 LOGGER.debug('Weekday = {0}'.format(weekday)) 

433 start_delta = timedelta(days=weekday, weeks=origin) 

434 if unit == 'months': 

435 point_of_ref = datetime(raw_point_of_ref.year, raw_point_of_ref.month, 1, 0, 0, 0) 

436 year = raw_point_of_ref.year 

437 month = raw_point_of_ref.month 

438 if origin > 0: 

439 for _ in range(0, origin): 

440 if month == 1: 

441 year -= 1 

442 month = 12 

443 else: 

444 month -= 1 

445 else: 

446 for _ in range(origin, 0): 

447 if month == 12: 

448 year += 1 

449 month = 1 

450 else: 

451 month += 1 

452 start_date = datetime(year, month, 1, 0, 0, 0) 

453 if unit == 'years': 

454 point_of_ref = datetime(raw_point_of_ref.year, 1, 1, 0, 0, 0) 

455 start_date = datetime(raw_point_of_ref.year - origin, 1, 1, 0, 0, 0) 

456 if unit not in ['months', 'years']: 

457 start_date = point_of_ref - start_delta 

458 # By this point, we know our start date and can convert it to epoch time 

459 start_epoch = datetime_to_epoch(start_date) 

460 LOGGER.debug('Start ISO8601 = {0}'.format(datetime.utcfromtimestamp(start_epoch).isoformat())) 

461 # This is the number of units we need to consider. 

462 count = (range_to - range_from) + 1 

463 # We have to iterate to one more month, and then subtract a second to get 

464 # the last day of the correct month 

465 if unit == 'months': 

466 month = start_date.month 

467 year = start_date.year 

468 for _ in range(0, count): 

469 if month == 12: 

470 year += 1 

471 month = 1 

472 else: 

473 month += 1 

474 end_date = datetime(year, month, 1, 0, 0, 0) 

475 end_epoch = datetime_to_epoch(end_date) - 1 

476 # Similarly, with years, we need to get the last moment of the year 

477 elif unit == 'years': 

478 end_date = datetime((raw_point_of_ref.year - origin) + count, 1, 1, 0, 0, 0) 

479 end_epoch = datetime_to_epoch(end_date) - 1 

480 # It's not months or years, which have inconsistent reckoning... 

481 else: 

482 # This lets us use an existing method to simply add unit * count seconds 

483 # to get hours, days, or weeks, as they don't change 

484 end_epoch = get_point_of_reference( 

485 unit, count * -1, epoch=start_epoch) -1 

486 LOGGER.debug('End ISO8601 = {0}'.format( 

487 datetime.utcfromtimestamp(end_epoch).isoformat())) 

488 return (start_epoch, end_epoch) 

489 

490def absolute_date_range( 

491 unit, date_from, date_to, 

492 date_from_format=None, date_to_format=None 

493 ): 

494 """ 

495 Get the epoch start time and end time of a range of ``unit``s, reckoning the 

496 start of the week (if that's the selected unit) based on ``week_starts_on``, 

497 which can be either ``sunday`` or ``monday``. 

498 

499 :arg unit: One of ``hours``, ``days``, ``weeks``, ``months``, or ``years``. 

500 :arg date_from: The simplified date for the start of the range 

501 :arg date_to: The simplified date for the end of the range. If this value 

502 is the same as ``date_from``, the full value of ``unit`` will be 

503 extrapolated for the range. For example, if ``unit`` is ``months``, 

504 and ``date_from`` and ``date_to`` are both ``2017.01``, then the entire 

505 month of January 2017 will be the absolute date range. 

506 :arg date_from_format: The strftime string used to parse ``date_from`` 

507 :arg date_to_format: The strftime string used to parse ``date_to`` 

508 :rtype: tuple 

509 """ 

510 acceptable_units = ['seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years'] 

511 if unit not in acceptable_units: 

512 raise exceptions.ConfigurationError( 

513 '"unit" must be one of: {0}'.format(acceptable_units)) 

514 if not date_from_format or not date_to_format: 

515 raise exceptions.ConfigurationError('Must provide "date_from_format" and "date_to_format"') 

516 try: 

517 start_epoch = datetime_to_epoch(get_datetime(date_from, date_from_format)) 

518 LOGGER.debug( 

519 'Start ISO8601 = {0}'.format(datetime.utcfromtimestamp(start_epoch).isoformat())) 

520 except Exception as err: 

521 raise exceptions.ConfigurationError( 

522 'Unable to parse "date_from" {0} and "date_from_format" {1}. ' 

523 'Error: {2}'.format(date_from, date_from_format, err) 

524 ) 

525 try: 

526 end_date = get_datetime(date_to, date_to_format) 

527 except Exception as err: 

528 raise exceptions.ConfigurationError( 

529 'Unable to parse "date_to" {0} and "date_to_format" {1}. ' 

530 'Error: {2}'.format(date_to, date_to_format, err) 

531 ) 

532 # We have to iterate to one more month, and then subtract a second to get 

533 # the last day of the correct month 

534 if unit == 'months': 

535 month = end_date.month 

536 year = end_date.year 

537 if month == 12: 

538 year += 1 

539 month = 1 

540 else: 

541 month += 1 

542 new_end_date = datetime(year, month, 1, 0, 0, 0) 

543 end_epoch = datetime_to_epoch(new_end_date) - 1 

544 # Similarly, with years, we need to get the last moment of the year 

545 elif unit == 'years': 

546 new_end_date = datetime(end_date.year + 1, 1, 1, 0, 0, 0) 

547 end_epoch = datetime_to_epoch(new_end_date) - 1 

548 # It's not months or years, which have inconsistent reckoning... 

549 else: 

550 # This lets us use an existing method to simply add 1 more unit's worth 

551 # of seconds to get hours, days, or weeks, as they don't change 

552 # We use -1 as point of reference normally subtracts from the epoch 

553 # and we need to add to it, so we'll make it subtract a negative value. 

554 # Then, as before, subtract 1 to get the end of the period 

555 end_epoch = get_point_of_reference( 

556 unit, -1, epoch=datetime_to_epoch(end_date)) -1 

557 

558 LOGGER.debug('End ISO8601 = {0}'.format( 

559 datetime.utcfromtimestamp(end_epoch).isoformat())) 

560 return (start_epoch, end_epoch) 

561 

562def byte_size(num, suffix='B'): 

563 """ 

564 Return a formatted string indicating the size in bytes, with the proper 

565 unit, e.g. KB, MB, GB, TB, etc. 

566 

567 :arg num: The number of byte 

568 :arg suffix: An arbitrary suffix, like `Bytes` 

569 :rtype: float 

570 """ 

571 for unit in ['', 'K', 'M', 'G', 'T', 'P', 'E', 'Z']: 

572 if abs(num) < 1024.0: 

573 return "%3.1f%s%s" % (num, unit, suffix) 

574 num /= 1024.0 

575 return "%.1f%s%s" % (num, 'Y', suffix) 

576 

577def ensure_list(indices): 

578 """ 

579 Return a list, even if indices is a single value 

580 

581 :arg indices: A list of indices to act upon 

582 :rtype: list 

583 """ 

584 if not isinstance(indices, list): # in case of a single value passed 

585 indices = [indices] 

586 return indices 

587 

588def to_csv(indices): 

589 """ 

590 Return a csv string from a list of indices, or a single value if only one 

591 value is present 

592 

593 :arg indices: A list of indices to act on, or a single value, which could be 

594 in the format of a csv string already. 

595 :rtype: str 

596 """ 

597 indices = ensure_list(indices) # in case of a single value passed 

598 if indices: 

599 return ','.join(sorted(indices)) 

600 else: 

601 return None 

602 

603def check_csv(value): 

604 """ 

605 Some of the curator methods should not operate against multiple indices at 

606 once. This method can be used to check if a list or csv has been sent. 

607 

608 :arg value: The value to test, if list or csv string 

609 :rtype: bool 

610 """ 

611 if isinstance(value, list): 

612 return True 

613 # Python3 hack because it doesn't recognize unicode as a type anymore 

614 if sys.version_info < (3, 0): 

615 # pylint: disable=E0602 

616 if isinstance(value, unicode): 

617 value = str(value) 

618 if isinstance(value, str): 

619 if len(value.split(',')) > 1: # It's a csv string. 

620 return True 

621 else: # There's only one value here, so it's not a csv string 

622 return False 

623 else: 

624 raise TypeError( 

625 'Passed value: {0} is not a list or a string ' 

626 'but is of type {1}'.format(value, type(value)) 

627 ) 

628 

629def chunk_index_list(indices): 

630 """ 

631 This utility chunks very large index lists into 3KB chunks 

632 It measures the size as a csv string, then converts back into a list 

633 for the return value. 

634 

635 :arg indices: A list of indices to act on. 

636 :rtype: list 

637 """ 

638 chunks = [] 

639 chunk = "" 

640 for index in indices: 

641 if len(chunk) < 3072: 

642 if not chunk: 

643 chunk = index 

644 else: 

645 chunk += "," + index 

646 else: 

647 chunks.append(chunk.split(',')) 

648 chunk = index 

649 chunks.append(chunk.split(',')) 

650 return chunks 

651 

652def get_indices(client): 

653 """ 

654 Get the current list of indices from the cluster. 

655 

656 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

657 :rtype: list 

658 """ 

659 try: 

660 indices = list( 

661 client.indices.get_settings(index='_all', params={'expand_wildcards': 'open,closed'}) 

662 ) 

663 version_number = get_version(client) 

664 LOGGER.debug( 

665 'Detected Elasticsearch version ' 

666 '{0}'.format(".".join(map(str, version_number))) 

667 ) 

668 LOGGER.debug("All indices: {0}".format(indices)) 

669 return indices 

670 except Exception as err: 

671 raise exceptions.FailedExecution('Failed to get indices. Error: {0}'.format(err)) 

672 

673def get_version(client): 

674 """ 

675 Return the ES version number as a tuple. 

676 Omits trailing tags like -dev, or Beta 

677 

678 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

679 :rtype: tuple 

680 """ 

681 version = client.info()['version']['number'] 

682 version = version.split('-')[0] 

683 if len(version.split('.')) > 3: 

684 version = version.split('.')[:-1] 

685 else: 

686 version = version.split('.') 

687 return tuple(map(int, version)) 

688 

689def is_master_node(client): 

690 """ 

691 Return `True` if the connected client node is the elected master node in 

692 the Elasticsearch cluster, otherwise return `False`. 

693 

694 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

695 :rtype: bool 

696 """ 

697 my_node_id = list(client.nodes.info('_local')['nodes'])[0] 

698 master_node_id = client.cluster.state(metric='master_node')['master_node'] 

699 return my_node_id == master_node_id 

700 

701def check_version(client): 

702 """ 

703 Verify version is within acceptable range. Raise an exception if it is not. 

704 

705 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

706 :rtype: None 

707 """ 

708 version_number = get_version(client) 

709 LOGGER.debug('Detected Elasticsearch version {0}'.format(".".join(map(str, version_number)))) 

710 if version_number >= settings.version_max() \ 

711 or version_number < settings.version_min(): 

712 LOGGER.error( 

713 'Elasticsearch version {0} incompatible with this version of Curator ' 

714 '({1})'.format(".".join(map(str, version_number)), __version__) 

715 ) 

716 raise exceptions.CuratorException( 

717 'Elasticsearch version {0} incompatible with this version of Curator ' 

718 '({1})'.format(".".join(map(str, version_number)), __version__) 

719 ) 

720 

721def check_master(client, master_only=False): 

722 """ 

723 Check if connected client is the elected master node of the cluster. 

724 If not, cleanly exit with a log message. 

725 

726 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

727 :rtype: None 

728 """ 

729 if master_only and not is_master_node(client): 

730 LOGGER.info( 

731 'Master-only flag detected. ' 

732 'Connected to non-master node. Aborting.' 

733 ) 

734 sys.exit(0) 

735 

736def process_url_prefix_arg(data): 

737 """Test for and validate the ``url_prefix`` setting""" 

738 if 'url_prefix' in data: 

739 if (data['url_prefix'] is None or data['url_prefix'] == "None"): 

740 data['url_prefix'] = '' 

741 return data 

742 

743def process_host_args(data): 

744 """ 

745 Check for ``host`` and ``hosts`` in the provided dictionary. 

746 Raise an exception if both ``host`` and ``hosts`` are present. 

747 If ``host`` is used, replace that with ``hosts``. 

748 """ 

749 if 'host' in data and 'hosts' in data: 

750 raise exceptions.ConfigurationError( 

751 'Both "host" and "hosts" are defined. Pick only one.') 

752 elif 'host' in data and 'hosts' not in data: 

753 data['hosts'] = data['host'] 

754 del data['host'] 

755 data['hosts'] = '127.0.0.1' if 'hosts' not in data else data['hosts'] 

756 data['hosts'] = ensure_list(data['hosts']) 

757 return data 

758 

759def process_x_api_key_arg(data): 

760 """Test for arg and set x-api-key header if present""" 

761 api_key = data.pop('api_key', False) 

762 if api_key: 

763 data['headers'] = {'x-api-key': api_key} 

764 return data 

765 

766def process_master_only_arg(data): 

767 """ 

768 Test whether there are multiple hosts and ``master_only`` is ``True`` 

769 

770 Return the data/kwargs minus the ``master_only`` key/value pair if the test passes. Otherwise, 

771 raise an exception. 

772 """ 

773 master_only = data.pop('master_only', False) 

774 if master_only: 

775 if len(data['hosts']) > 1: 

776 LOGGER.error( 

777 '"master_only" cannot be true if more than one host is ' 

778 'specified. Hosts = {0}'.format(data['hosts']) 

779 ) 

780 raise exceptions.ConfigurationError( 

781 '"master_only" cannot be true if more than one host is ' 

782 'specified. Hosts = {0}'.format(data['hosts']) 

783 ) 

784 return data, master_only 

785 

786def process_auth_args(data): 

787 """ 

788 Return a valid http_auth tuple for authentication in the elasticsearch7.Elasticsearch 

789 client object 

790 """ 

791 http_auth = data['http_auth'] if 'http_auth' in data else None 

792 username = data.pop('username', False) 

793 password = data.pop('password', False) 

794 if http_auth: 

795 # No change to http_auth 

796 LOGGER.warning( 

797 'Use of "http_auth" is deprecated. Please use "username" and "password" instead.') 

798 elif username and password: 

799 http_auth = (username, password) 

800 elif not username and password: 

801 LOGGER.error('Password provided without username.') 

802 LOGGER.fatal('Curator cannot proceed. Exiting.') 

803 raise exceptions.ClientException 

804 elif username and not password: 

805 LOGGER.error('Username provided without password.') 

806 LOGGER.fatal('Curator cannot proceed. Exiting.') 

807 raise exceptions.ClientException 

808 # else all are empty or None, so no worries. Return as-is 

809 data['http_auth'] = http_auth 

810 return data 

811 

812def isbase64(data): 

813 try: 

814 return base64.b64encode(base64.b64decode(data)).decode() == data 

815 except Exception: 

816 return False 

817 

818def process_apikey_auth_args(data): 

819 """ 

820 Return a valid api_key base64 token for API Key authentication in the elasticsearch7.Elasticsearch 

821 client object 

822 """ 

823 api_key = data.pop('apikey_auth', None) 

824 if api_key and not isbase64(api_key): 

825 LOGGER.error('apikey_auth shall be base64 encoded.') 

826 LOGGER.fatal('Curator cannot proceed. Exiting.') 

827 raise exceptions.ConfigurationError 

828 

829 data['api_key'] = api_key 

830 return data 

831 

832def process_ssl_args(data): 

833 """Populate and validate the proper SSL args in data and return it""" 

834 data['use_ssl'] = False if 'use_ssl' not in data else data['use_ssl'] 

835 data['ssl_no_validate'] = False if 'ssl_no_validate' not in data \ 

836 else data['ssl_no_validate'] 

837 data['certificate'] = False if 'certificate' not in data \ 

838 else data['certificate'] 

839 data['client_cert'] = False if 'client_cert' not in data \ 

840 else data['client_cert'] 

841 data['client_key'] = False if 'client_key' not in data \ 

842 else data['client_key'] 

843 if data['use_ssl']: 

844 if data['ssl_no_validate']: 

845 data['verify_certs'] = False # Not needed, but explicitly defined 

846 else: 

847 LOGGER.debug('Attempting to verify SSL certificate.') 

848 # If user provides a certificate: 

849 if data['certificate']: 

850 data['verify_certs'] = True 

851 data['ca_certs'] = data['certificate'] 

852 else: # Try to use bundled certifi certificates 

853 if getattr(sys, 'frozen', False): 

854 # The application is frozen (compiled) 

855 datadir = os.path.dirname(sys.executable) 

856 data['verify_certs'] = True 

857 data['ca_certs'] = os.path.join(datadir, 'cacert.pem') 

858 else: 

859 # Use certifi certificates via certifi.where(): 

860 import certifi 

861 data['verify_certs'] = True 

862 data['ca_certs'] = certifi.where() 

863 return data 

864 

865def process_aws_args(data): 

866 """Process all AWS client args. Raise exceptions if they are incomplete""" 

867 data['aws_key'] = False if 'aws_key' not in data else data['aws_key'] 

868 data['aws_secret_key'] = False if 'aws_secret_key' not in data else data['aws_secret_key'] 

869 data['aws_token'] = '' if 'aws_token' not in data else data['aws_token'] 

870 data['aws_sign_request'] = False if 'aws_sign_request' not in data \ 

871 else data['aws_sign_request'] 

872 data['aws_region'] = False if 'aws_region' not in data \ 

873 else data['aws_region'] 

874 if data['aws_key'] or data['aws_secret_key'] or data['aws_sign_request']: 

875 if not data['aws_region']: 

876 raise exceptions.MissingArgument('Missing "aws_region".') 

877 if data['aws_key'] or data['aws_secret_key']: 

878 if not (data['aws_key'] and data['aws_secret_key']): 

879 raise exceptions.MissingArgument('Missing AWS Access Key or AWS Secret Key') 

880 return data 

881 

882def try_boto_session(data): 

883 """Try to obtain AWS credentials using boto""" 

884 if data['aws_sign_request']: 

885 try: 

886 from boto3 import session 

887 from botocore import exceptions as botoex 

888 # We cannot get credentials without the boto3 library, so we cannot continue 

889 except ImportError as err: 

890 LOGGER.error('Unable to sign AWS requests. Failed to import a module: {0}'.format(err)) 

891 raise ImportError('Failed to import a module: {0}'.format(err)) 

892 try: 

893 if 'aws_region' in data: 

894 session = session.Session(region_name=data['aws_region']) 

895 else: 

896 session = session.Session() 

897 credentials = session.get_credentials() 

898 data['aws_key'] = credentials.access_key 

899 data['aws_secret_key'] = credentials.secret_key 

900 data['aws_token'] = credentials.token 

901 # If an attribute doesn't exist, we were not able to retrieve credentials 

902 # as expected so we can't continue 

903 except AttributeError: 

904 LOGGER.debug('Unable to locate AWS credentials') 

905 raise botoex.NoCredentialsError 

906 return data 

907 

908def try_aws_auth(data): 

909 """Set ``data`` with AWS credentials and the requisite SSL flags if detected""" 

910 LOGGER.debug('Checking for AWS settings') 

911 has_requests_module = False 

912 try: 

913 from requests_aws4auth import AWS4Auth 

914 has_requests_module = True 

915 except ImportError: 

916 LOGGER.debug('Not using "requests_aws4auth" python module to connect.') 

917 if has_requests_module: 

918 if data['aws_key']: 

919 LOGGER.info('Configuring client to connect to AWS endpoint') 

920 # Override these key values 

921 data['use_ssl'] = True 

922 data['verify_certs'] = True 

923 if data['ssl_no_validate']: 

924 data['verify_certs'] = False 

925 data['http_auth'] = ( 

926 AWS4Auth( 

927 data['aws_key'], data['aws_secret_key'], 

928 data['aws_region'], 'es', session_token=data['aws_token']) 

929 ) 

930 return data 

931 

932def do_version_check(client, skip): 

933 """ 

934 Do a test of the Elasticsearch version, unless ``skip`` is ``True`` 

935 """ 

936 if skip: 

937 LOGGER.warning( 

938 'Skipping Elasticsearch version verification. This is ' 

939 'acceptable for remote reindex operations.' 

940 ) 

941 else: 

942 LOGGER.debug('Checking Elasticsearch endpoint version...') 

943 try: 

944 # Verify the version is acceptable. 

945 check_version(client) 

946 except exceptions.CuratorException as err: 

947 LOGGER.error('{0}'.format(err)) 

948 LOGGER.fatal('Curator cannot continue due to version incompatibilites. Exiting') 

949 raise exceptions.ClientException 

950 

951def verify_master_status(client, master_only): 

952 """ 

953 Verify that the client is connected to the elected master node. 

954 Raise an exception if it is not. 

955 """ 

956 # Verify "master_only" status, if applicable 

957 if master_only: 

958 LOGGER.info('Connecting only to local master node...') 

959 try: 

960 check_master(client, master_only=master_only) 

961 except exceptions.ConfigurationError as err: 

962 LOGGER.error('master_only check failed: {0}'.format(err)) 

963 LOGGER.fatal('Curator cannot continue. Exiting.') 

964 raise exceptions.ClientException 

965 else: 

966 LOGGER.debug('Not verifying local master status (master_only: false)') 

967 

968 

969def get_client(**kwargs): 

970 """ 

971 NOTE: AWS IAM parameters `aws_sign_request` and `aws_region` are 

972 provided to facilitate request signing. The credentials will be 

973 fetched from the local environment as per the AWS documentation: 

974 http://amzn.to/2fRCGCt 

975 

976 AWS IAM parameters `aws_key`, `aws_secret_key`, and `aws_region` are 

977 provided for users that still have their keys included in the Curator config file. 

978 

979 Return an :class:`elasticsearch7.Elasticsearch` client object using the 

980 provided parameters. Any of the keyword arguments the 

981 :class:`elasticsearch7.Elasticsearch` client object can receive are valid, 

982 such as: 

983 

984 :arg hosts: A list of one or more Elasticsearch client hostnames or IP 

985 addresses to connect to. Can send a single host. 

986 :type hosts: list 

987 :arg port: The Elasticsearch client port to connect to. 

988 :type port: int 

989 :arg url_prefix: `Optional` url prefix, if needed to reach the Elasticsearch 

990 API (i.e., it's not at the root level) 

991 :type url_prefix: str 

992 :arg use_ssl: Whether to connect to the client via SSL/TLS 

993 :type use_ssl: bool 

994 :arg certificate: Path to SSL/TLS certificate 

995 :arg client_cert: Path to SSL/TLS client certificate (public key) 

996 :arg client_key: Path to SSL/TLS private key 

997 :arg aws_key: AWS IAM Access Key (Only used if the :mod:`requests-aws4auth` 

998 python module is installed) 

999 :arg aws_secret_key: AWS IAM Secret Access Key (Only used if the 

1000 :mod:`requests-aws4auth` python module is installed) 

1001 :arg aws_region: AWS Region (Only used if the :mod:`requests-aws4auth` 

1002 python module is installed) 

1003 :arg aws_sign_request: Sign request to AWS (Only used if the :mod:`requests-aws4auth` 

1004 and :mod:`boto3` python modules are installed) 

1005 :arg aws_region: AWS Region where the cluster exists (Only used if the :mod:`requests-aws4auth` 

1006 and :mod:`boto3` python modules are installed) 

1007 :arg ssl_no_validate: If `True`, do not validate the certificate 

1008 chain. This is an insecure option and you will see warnings in the 

1009 log output. 

1010 :type ssl_no_validate: bool 

1011 :arg username: HTTP basic authentication username. Ignored if ``http_auth`` is set. 

1012 :type username: str 

1013 :arg password: HTTP basic authentication password. Ignored if ``http_auth`` is set. 

1014 :type password: str 

1015 :arg http_auth: Authentication credentials in `user:pass` format. 

1016 :type http_auth: str 

1017 :arg timeout: Number of seconds before the client will timeout. 

1018 :type timeout: int 

1019 :arg master_only: If `True`, the client will `only` connect if the 

1020 endpoint is the elected master node of the cluster. **This option does 

1021 not work if `hosts` has more than one value.** It will raise an 

1022 Exception in that case. 

1023 :type master_only: bool 

1024 :arg skip_version_test: If `True`, skip the version check as part of the 

1025 client connection. 

1026 :rtype: :class:`elasticsearch7.Elasticsearch` 

1027 :arg api_key: value to be used in optional X-Api-key header when accessing Elasticsearch 

1028 :type api_key: str 

1029 :arg apikey_auth: API Key authentication in `id:api_key` encoded in base64 format. 

1030 :type apikey_auth: str 

1031 """ 

1032 # Walk through parsing/testing series of arguments to build the client 

1033 skip_version_test = kwargs.pop('skip_version_test', False) 

1034 kwargs = process_url_prefix_arg(kwargs) 

1035 kwargs = process_host_args(kwargs) 

1036 kwargs = process_x_api_key_arg(kwargs) 

1037 kwargs['connection_class'] = elasticsearch7.RequestsHttpConnection 

1038 kwargs = process_ssl_args(kwargs) 

1039 kwargs = process_aws_args(kwargs) 

1040 kwargs = try_boto_session(kwargs) 

1041 kwargs = try_aws_auth(kwargs) 

1042 kwargs, master_only = process_master_only_arg(kwargs) 

1043 kwargs = process_auth_args(kwargs) 

1044 kwargs = process_apikey_auth_args(kwargs) 

1045 

1046 LOGGER.debug("kwargs = {0}".format(kwargs)) 

1047 fail = False 

1048 

1049 try: 

1050 # Creating the class object should be okay 

1051 LOGGER.info('Instantiating client object') 

1052 client = elasticsearch7.Elasticsearch(**kwargs) 

1053 # Test client connectivity (debug log client.info() output) 

1054 LOGGER.info('Testing client connectivity') 

1055 LOGGER.debug('Cluster info: {0}'.format(client.info())) 

1056 LOGGER.info('Successfully created Elasticsearch client object with provided settings') 

1057 # Catch all TransportError types first 

1058 except elasticsearch7.TransportError as err: 

1059 try: 

1060 reason = err.info['error']['reason'] 

1061 except: 

1062 reason = err.error 

1063 LOGGER.error('HTTP {0} error: {1}'.format(err.status_code, reason)) 

1064 fail = True 

1065 # Catch other potential exceptions 

1066 except Exception as err: 

1067 LOGGER.error('Unable to connect to Elasticsearch cluster. Error: {0}'.format(err)) 

1068 fail = True 

1069 ## failure checks 

1070 # First level failure check 

1071 if fail: 

1072 LOGGER.fatal('Curator cannot proceed. Exiting.') 

1073 raise exceptions.ClientException 

1074 # Second level failure check: acceptable version 

1075 do_version_check(client, skip_version_test) 

1076 # Third level failure check: master_only 

1077 verify_master_status(client, master_only) 

1078 return client 

1079 

1080def show_dry_run(ilo, action, **kwargs): 

1081 """ 

1082 Log dry run output with the action which would have been executed. 

1083 

1084 :arg ilo: A :class:`curator.indexlist.IndexList` 

1085 :arg action: The `action` to be performed. 

1086 :arg kwargs: Any other args to show in the log output 

1087 """ 

1088 LOGGER.info('DRY-RUN MODE. No changes will be made.') 

1089 LOGGER.info( 

1090 '(CLOSED) indices may be shown that may not be acted on by action "{0}".'.format(action) 

1091 ) 

1092 indices = sorted(ilo.indices) 

1093 for idx in indices: 

1094 index_closed = ilo.index_info[idx]['state'] == 'close' 

1095 LOGGER.info( 

1096 'DRY-RUN: {0}: {1}{2} with arguments: ' 

1097 '{3}'.format(action, idx, ' (CLOSED)' if index_closed else '', kwargs) 

1098 ) 

1099 

1100### SNAPSHOT STUFF ### 

1101def get_repository(client, repository=''): 

1102 """ 

1103 Return configuration information for the indicated repository. 

1104 

1105 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1106 :arg repository: The Elasticsearch snapshot repository to use 

1107 :rtype: dict 

1108 """ 

1109 try: 

1110 return client.snapshot.get_repository(repository=repository) 

1111 except (elasticsearch7.TransportError, elasticsearch7.NotFoundError) as err: 

1112 raise exceptions.CuratorException( 

1113 'Unable to get repository {0}. Response Code: {1} Error: {2} Check Elasticsearch ' 

1114 'logs for more information.'.format(repository, err.status_code, err.error) 

1115 ) 

1116 

1117def get_snapshot(client, repository=None, snapshot=''): 

1118 """ 

1119 Return information about a snapshot (or a comma-separated list of snapshots) 

1120 If no snapshot specified, it will return all snapshots. If none exist, an 

1121 empty dictionary will be returned. 

1122 

1123 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1124 :arg repository: The Elasticsearch snapshot repository to use 

1125 :arg snapshot: The snapshot name, or a comma-separated list of snapshots 

1126 :rtype: dict 

1127 """ 

1128 if not repository: 

1129 raise exceptions.MissingArgument('No value for "repository" provided') 

1130 snapname = '_all' if snapshot == '' else snapshot 

1131 try: 

1132 return client.snapshot.get(repository=repository, snapshot=snapshot) 

1133 except (elasticsearch7.TransportError, elasticsearch7.NotFoundError) as err: 

1134 raise exceptions.FailedExecution( 

1135 'Unable to get information about snapshot {0} from repository: ' 

1136 '{1}. Error: {2}'.format(snapname, repository, err) 

1137 ) 

1138 

1139def get_snapshot_data(client, repository=None): 

1140 """ 

1141 Get ``_all`` snapshots from repository and return a list. 

1142 

1143 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1144 :arg client: An :class:`elasticsearch6.Elasticsearch` client object 

1145 :arg repository: The Elasticsearch snapshot repository to use 

1146 :rtype: list 

1147 """ 

1148 if not repository: 

1149 raise exceptions.MissingArgument('No value for "repository" provided') 

1150 try: 

1151 return client.snapshot.get(repository=repository, snapshot="_all")['snapshots'] 

1152 except (elasticsearch7.TransportError, elasticsearch7.NotFoundError) as err: 

1153 raise exceptions.FailedExecution( 

1154 'Unable to get snapshot information from repository: ' 

1155 '{0}. Error: {1}'.format(repository, err) 

1156 ) 

1157 

1158def snapshot_in_progress(client, repository=None, snapshot=None): 

1159 """ 

1160 Determine whether the provided snapshot in `repository` is ``IN_PROGRESS``. 

1161 If no value is provided for `snapshot`, then check all of them. 

1162 Return `snapshot` if it is found to be in progress, or `False` 

1163 

1164 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1165 :arg repository: The Elasticsearch snapshot repository to use 

1166 :arg snapshot: The snapshot name 

1167 """ 

1168 allsnaps = get_snapshot_data(client, repository=repository) 

1169 inprogress = ( 

1170 [snap['snapshot'] for snap in allsnaps if 'state' in snap.keys() \ 

1171 and snap['state'] == 'IN_PROGRESS'] 

1172 ) 

1173 if snapshot: 

1174 retval = snapshot if snapshot in inprogress else False 

1175 else: 

1176 if not inprogress: 

1177 retval = False 

1178 elif len(inprogress) == 1: 

1179 retval = inprogress[0] 

1180 else: # This should not be possible 

1181 raise exceptions.CuratorException( 

1182 'More than 1 snapshot in progress: {0}'.format(inprogress) 

1183 ) 

1184 return retval 

1185 

1186def find_snapshot_tasks(client): 

1187 """ 

1188 Check if there is snapshot activity in the Tasks API. 

1189 Return `True` if activity is found, or `False` 

1190 

1191 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1192 :rtype: bool 

1193 """ 

1194 retval = False 

1195 tasklist = client.tasks.list() 

1196 for node in tasklist['nodes']: 

1197 for task in tasklist['nodes'][node]['tasks']: 

1198 activity = tasklist['nodes'][node]['tasks'][task]['action'] 

1199 if 'snapshot' in activity: 

1200 LOGGER.debug('Snapshot activity detected: {0}'.format(activity)) 

1201 retval = True 

1202 return retval 

1203 

1204def safe_to_snap(client, repository=None, retry_interval=120, retry_count=3): 

1205 """ 

1206 Ensure there are no snapshots in progress. Pause and retry accordingly 

1207 

1208 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1209 :arg repository: The Elasticsearch snapshot repository to use 

1210 :arg retry_interval: Number of seconds to delay betwen retries. Default: 

1211 120 (seconds) 

1212 :arg retry_count: Number of attempts to make. Default: 3 

1213 :rtype: bool 

1214 """ 

1215 if not repository: 

1216 raise exceptions.MissingArgument('No value for "repository" provided') 

1217 for count in range(1, retry_count+1): 

1218 in_progress = snapshot_in_progress( 

1219 client, repository=repository 

1220 ) 

1221 ongoing_task = find_snapshot_tasks(client) 

1222 if in_progress or ongoing_task: 

1223 if in_progress: 

1224 LOGGER.info( 

1225 'Snapshot already in progress: {0}'.format(in_progress)) 

1226 elif ongoing_task: 

1227 LOGGER.info('Snapshot activity detected in Tasks API') 

1228 LOGGER.info( 

1229 'Pausing {0} seconds before retrying...'.format(retry_interval)) 

1230 time.sleep(retry_interval) 

1231 LOGGER.info('Retry {0} of {1}'.format(count, retry_count)) 

1232 else: 

1233 return True 

1234 return False 

1235 

1236def create_snapshot_body(indices, ignore_unavailable=False, 

1237 include_global_state=True, partial=False): 

1238 """ 

1239 Create the request body for creating a snapshot from the provided 

1240 arguments. 

1241 

1242 :arg indices: A single index, or list of indices to snapshot. 

1243 :arg ignore_unavailable: Ignore unavailable shards/indices. (default: 

1244 `False`) 

1245 :type ignore_unavailable: bool 

1246 :arg include_global_state: Store cluster global state with snapshot. 

1247 (default: `True`) 

1248 :type include_global_state: bool 

1249 :arg partial: Do not fail if primary shard is unavailable. (default: 

1250 `False`) 

1251 :type partial: bool 

1252 :rtype: dict 

1253 """ 

1254 if not indices: 

1255 LOGGER.error('No indices provided.') 

1256 return False 

1257 body = { 

1258 "ignore_unavailable": ignore_unavailable, 

1259 "include_global_state": include_global_state, 

1260 "partial": partial, 

1261 } 

1262 if indices == '_all': 

1263 body["indices"] = indices 

1264 else: 

1265 body["indices"] = to_csv(indices) 

1266 return body 

1267 

1268def create_repo_body(repo_type=None, 

1269 compress=True, chunk_size=None, 

1270 max_restore_bytes_per_sec=None, 

1271 max_snapshot_bytes_per_sec=None, 

1272 location=None, 

1273 bucket=None, region=None, base_path=None, access_key=None, 

1274 secret_key=None, 

1275 role_arn=None, **kwargs): 

1276 """ 

1277 Build the 'body' portion for use in creating a repository. 

1278 

1279 :arg repo_type: The type of repository (presently only `fs` and `s3`) 

1280 :arg compress: Turn on compression of the snapshot files. Compression is 

1281 applied only to metadata files (index mapping and settings). Data files 

1282 are not compressed. (Default: `True`) 

1283 :arg chunk_size: The chunk size can be specified in bytes or by using size 

1284 value notation, i.e. 1g, 10m, 5k. Defaults to `null` (unlimited chunk 

1285 size). 

1286 :arg max_restore_bytes_per_sec: Throttles per node restore rate. Defaults 

1287 to ``20mb`` per second. 

1288 :arg max_snapshot_bytes_per_sec: Throttles per node snapshot rate. Defaults 

1289 to ``20mb`` per second. 

1290 :arg location: Location of the snapshots. Required. 

1291 :arg bucket: `S3 only.` The name of the bucket to be used for snapshots. 

1292 Required. 

1293 :arg region: `S3 only.` The region where bucket is located. Defaults to 

1294 `US Standard` 

1295 :arg base_path: `S3 only.` Specifies the path within bucket to repository 

1296 data. Defaults to value of ``repositories.s3.base_path`` or to root 

1297 directory if not set. 

1298 :arg access_key: `S3 only.` The access key to use for authentication. 

1299 Defaults to value of ``cloud.aws.access_key``. 

1300 :arg secret_key: `S3 only.` The secret key to use for authentication. 

1301 Defaults to value of ``cloud.aws.secret_key``. 

1302 :arg role_arn: `S3 only.` The role arn for snapshot registration. 

1303 Required. 

1304 

1305 :returns: A dictionary suitable for creating a repository from the provided 

1306 arguments. 

1307 :rtype: dict 

1308 """ 

1309 # This shouldn't happen, but just in case... 

1310 if not repo_type: 

1311 raise exceptions.MissingArgument('Missing required parameter --repo_type') 

1312 

1313 argdict = locals() 

1314 body = {} 

1315 body['type'] = argdict['repo_type'] 

1316 body['settings'] = {} 

1317 settingz = [] # Differentiate from module settings 

1318 maybes = [ 

1319 'compress', 'chunk_size', 'max_restore_bytes_per_sec', 'max_snapshot_bytes_per_sec'] 

1320 s3args = ['bucket', 'region', 'base_path', 'access_key', 'secret_key', 'role_arn'] 

1321 

1322 settingz += [i for i in maybes if argdict[i]] 

1323 # Type 'fs' 

1324 if argdict['repo_type'] == 'fs': 

1325 settingz.append('location') 

1326 # Type 's3' 

1327 if argdict['repo_type'] == 's3': 

1328 settingz += [i for i in s3args if argdict[i]] 

1329 for k in settingz: 

1330 body['settings'][k] = argdict[k] 

1331 return body 

1332 

1333def create_repository(client, **kwargs): 

1334 """ 

1335 Create repository with repository and body settings 

1336 

1337 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1338 

1339 :arg repository: The Elasticsearch snapshot repository to use 

1340 :arg repo_type: The type of repository (presently only `fs` and `s3`) 

1341 :arg compress: Turn on compression of the snapshot files. Compression is 

1342 applied only to metadata files (index mapping and settings). Data files 

1343 are not compressed. (Default: `True`) 

1344 :arg chunk_size: The chunk size can be specified in bytes or by using size 

1345 value notation, i.e. 1g, 10m, 5k. Defaults to `null` (unlimited chunk 

1346 size). 

1347 :arg max_restore_bytes_per_sec: Throttles per node restore rate. Defaults 

1348 to ``20mb`` per second. 

1349 :arg max_snapshot_bytes_per_sec: Throttles per node snapshot rate. Defaults 

1350 to ``20mb`` per second. 

1351 :arg location: Location of the snapshots. Required. 

1352 :arg bucket: `S3 only.` The name of the bucket to be used for snapshots. 

1353 Required. 

1354 :arg region: `S3 only.` The region where bucket is located. Defaults to 

1355 `US Standard` 

1356 :arg base_path: `S3 only.` Specifies the path within bucket to repository 

1357 data. Defaults to value of ``repositories.s3.base_path`` or to root 

1358 directory if not set. 

1359 :arg access_key: `S3 only.` The access key to use for authentication. 

1360 Defaults to value of ``cloud.aws.access_key``. 

1361 :arg secret_key: `S3 only.` The secret key to use for authentication. 

1362 Defaults to value of ``cloud.aws.secret_key``. 

1363 :arg skip_repo_fs_check: Skip verifying the repo after creation. 

1364 

1365 :returns: A boolean value indicating success or failure. 

1366 :rtype: bool 

1367 """ 

1368 if 'repository' not in kwargs: 

1369 raise exceptions.MissingArgument('Missing required parameter "repository"') 

1370 else: 

1371 repository = kwargs['repository'] 

1372 skip_repo_fs_check = kwargs.pop('skip_repo_fs_check', False) 

1373 params = {'verify': 'false' if skip_repo_fs_check else 'true'} 

1374 

1375 try: 

1376 body = create_repo_body(**kwargs) 

1377 LOGGER.debug('Checking if repository {0} already exists...'.format(repository)) 

1378 result = repository_exists(client, repository=repository) 

1379 LOGGER.debug('Result = {0}'.format(result)) 

1380 if not result: 

1381 LOGGER.debug('Repository {0} not in Elasticsearch. Continuing...'.format(repository)) 

1382 client.snapshot.create_repository(repository=repository, body=body, params=params) 

1383 else: 

1384 raise exceptions.FailedExecution( 

1385 'Unable to create repository {0}. ' 

1386 'A repository with that name already exists.'.format(repository) 

1387 ) 

1388 except elasticsearch7.TransportError as err: 

1389 raise exceptions.FailedExecution( 

1390 """ 

1391 Unable to create repository {0}. Response Code: {1}. Error: {2}. 

1392 Check curator and elasticsearch logs for more information. 

1393 """.format(repository, err.status_code, err.error) 

1394 ) 

1395 LOGGER.debug("Repository {0} creation initiated...".format(repository)) 

1396 return True 

1397 

1398def repository_exists(client, repository=None): 

1399 """ 

1400 Verify the existence of a repository 

1401 

1402 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1403 :arg repository: The Elasticsearch snapshot repository to use 

1404 :rtype: bool 

1405 """ 

1406 if not repository: 

1407 raise exceptions.MissingArgument('No value for "repository" provided') 

1408 try: 

1409 test_result = get_repository(client, repository) 

1410 if repository in test_result: 

1411 LOGGER.debug("Repository {0} exists.".format(repository)) 

1412 response = True 

1413 else: 

1414 LOGGER.debug("Repository {0} not found...".format(repository)) 

1415 response = False 

1416 except Exception as err: 

1417 LOGGER.debug('Unable to find repository "{0}": Error: {1}'.format(repository, err)) 

1418 response = False 

1419 return response 

1420 

1421def test_repo_fs(client, repository=None): 

1422 """ 

1423 Test whether all nodes have write access to the repository 

1424 

1425 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1426 :arg repository: The Elasticsearch snapshot repository to use 

1427 """ 

1428 try: 

1429 nodes = client.snapshot.verify_repository( 

1430 repository=repository)['nodes'] 

1431 LOGGER.debug('All nodes can write to the repository') 

1432 LOGGER.debug('Nodes with verified repository access: {0}'.format(nodes)) 

1433 except Exception as err: 

1434 try: 

1435 if err.status_code == 404: 

1436 msg = ( 

1437 '--- Repository "{0}" not found. Error: ' 

1438 '{1}, {2}'.format(repository, err.status_code, err.error) 

1439 ) 

1440 else: 

1441 msg = ( 

1442 '--- Got a {0} response from Elasticsearch. ' 

1443 'Error message: {1}'.format(err.status_code, err.error) 

1444 ) 

1445 except AttributeError: 

1446 msg = ('--- Error message: {0}'.format(err)) 

1447 raise exceptions.ActionError( 

1448 'Failed to verify all nodes have repository access: {0}'.format(msg) 

1449 ) 

1450 

1451def snapshot_running(client): 

1452 """ 

1453 Return `True` if a snapshot is in progress, and `False` if not 

1454 

1455 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1456 :rtype: bool 

1457 """ 

1458 try: 

1459 status = client.snapshot.status()['snapshots'] 

1460 except Exception as err: 

1461 report_failure(err) 

1462 # We will only accept a positively identified False. Anything else is 

1463 # suspect. 

1464 return False if not status else True 

1465 

1466def parse_date_pattern(name): 

1467 """ 

1468 Scan and parse `name` for :py:func:`time.strftime` strings, replacing them 

1469 with the associated value when found, but otherwise returning lowercase 

1470 values, as uppercase snapshot names are not allowed. It will detect if the 

1471 first character is a `<`, which would indicate `name` is going to be using 

1472 Elasticsearch date math syntax, and skip accordingly. 

1473 

1474 The :py:func:`time.strftime` identifiers that Curator currently recognizes 

1475 as acceptable include: 

1476 

1477 * ``Y``: A 4 digit year 

1478 * ``y``: A 2 digit year 

1479 * ``m``: The 2 digit month 

1480 * ``W``: The 2 digit week of the year 

1481 * ``d``: The 2 digit day of the month 

1482 * ``H``: The 2 digit hour of the day, in 24 hour notation 

1483 * ``M``: The 2 digit minute of the hour 

1484 * ``S``: The 2 digit number of second of the minute 

1485 * ``j``: The 3 digit day of the year 

1486 

1487 :arg name: A name, which can contain :py:func:`time.strftime` 

1488 strings 

1489 """ 

1490 prev, rendered = ('', '') 

1491 LOGGER.debug('Provided index name: {0}'.format(name)) 

1492 for idx, char in enumerate(name): 

1493 LOGGER.debug('Current character in provided name: {0}, position: {1}'.format(char, idx)) 

1494 if char == '<': 

1495 LOGGER.info('"{0}" is probably using Elasticsearch date math.'.format(name)) 

1496 rendered = name 

1497 break 

1498 if char == '%': 

1499 pass 

1500 elif char in settings.date_regex() and prev == '%': 

1501 rendered += str(datetime.utcnow().strftime('%{0}'.format(char))) 

1502 else: 

1503 rendered += char 

1504 LOGGER.debug('Partially rendered name: {0}'.format(rendered)) 

1505 prev = char 

1506 LOGGER.debug('Fully rendered name: {0}'.format(rendered)) 

1507 return rendered 

1508 

1509def prune_nones(mydict): 

1510 """ 

1511 Remove keys from `mydict` whose values are `None` 

1512 

1513 :arg mydict: The dictionary to act on 

1514 :rtype: dict 

1515 """ 

1516 # Test for `None` instead of existence or zero values will be caught 

1517 return dict([(k, v) for k, v in mydict.items() if v is not None and v != 'None']) 

1518 

1519def validate_filters(action, filters): 

1520 """ 

1521 Validate that the filters are appropriate for the action type, e.g. no 

1522 index filters applied to a snapshot list. 

1523 

1524 :arg action: An action name 

1525 :arg filters: A list of filters to test. 

1526 """ 

1527 # Define which set of filtertypes to use for testing 

1528 if action in settings.snapshot_actions(): 

1529 filtertypes = settings.snapshot_filtertypes() 

1530 else: 

1531 filtertypes = settings.index_filtertypes() 

1532 for fil in filters: 

1533 if fil['filtertype'] not in filtertypes: 

1534 raise exceptions.ConfigurationError( 

1535 '"{0}" filtertype is not compatible with action "{1}"'.format( 

1536 fil['filtertype'], 

1537 action 

1538 ) 

1539 ) 

1540 # If we get to this point, we're still valid. Return the original list 

1541 return filters 

1542 

1543def validate_actions(data): 

1544 """ 

1545 Validate an Action configuration dictionary, as imported from actions.yml, 

1546 for example. 

1547 

1548 The method returns a validated and sanitized configuration dictionary. 

1549 

1550 :arg data: The configuration dictionary 

1551 :rtype: dict 

1552 """ 

1553 # data is the ENTIRE schema... 

1554 clean_config = {} 

1555 # Let's break it down into smaller chunks... 

1556 # First, let's make sure it has "actions" as a key, with a subdictionary 

1557 root = SchemaCheck(data, actions.root(), 'Actions File', 'root').result() 

1558 # We've passed the first step. Now let's iterate over the actions... 

1559 for action_id in root['actions']: 

1560 # Now, let's ensure that the basic action structure is correct, with 

1561 # the proper possibilities for 'action' 

1562 action_dict = root['actions'][action_id] 

1563 loc = 'Action ID "{0}"'.format(action_id) 

1564 valid_structure = SchemaCheck( 

1565 action_dict, 

1566 actions.structure(action_dict, loc), 

1567 'structure', 

1568 loc 

1569 ).result() 

1570 # With the basic structure validated, now we extract the action name 

1571 current_action = valid_structure['action'] 

1572 # And let's update the location with the action. 

1573 loc = 'Action ID "{0}", action "{1}"'.format( 

1574 action_id, current_action) 

1575 clean_options = SchemaCheck( 

1576 prune_nones(valid_structure['options']), 

1577 options.get_schema(current_action), 

1578 'options', 

1579 loc 

1580 ).result() 

1581 clean_config[action_id] = { 

1582 'action' : current_action, 

1583 'description' : valid_structure['description'], 

1584 'options' : clean_options, 

1585 } 

1586 if current_action == 'alias': 

1587 add_remove = {} 

1588 for k in ['add', 'remove']: 

1589 if k in valid_structure: 

1590 current_filters = SchemaCheck( 

1591 valid_structure[k]['filters'], 

1592 Schema(filters.Filters(current_action, location=loc)), 

1593 '"{0}" filters'.format(k), 

1594 '{0}, "filters"'.format(loc) 

1595 ).result() 

1596 add_remove.update( 

1597 { 

1598 k: { 

1599 'filters' : SchemaCheck( 

1600 current_filters, 

1601 Schema(filters.Filters(current_action, location=loc)), 

1602 'filters', 

1603 '{0}, "{1}", "filters"'.format(loc, k) 

1604 ).result() 

1605 } 

1606 } 

1607 ) 

1608 # Add/Remove here 

1609 clean_config[action_id].update(add_remove) 

1610 elif current_action in ['cluster_routing', 'create_index', 'rollover']: 

1611 # neither cluster_routing nor create_index should have filters 

1612 pass 

1613 else: # Filters key only appears in non-alias actions 

1614 valid_filters = SchemaCheck( 

1615 valid_structure['filters'], 

1616 Schema(filters.Filters(current_action, location=loc)), 

1617 'filters', 

1618 '{0}, "filters"'.format(loc) 

1619 ).result() 

1620 clean_filters = validate_filters(current_action, valid_filters) 

1621 clean_config[action_id].update({'filters' : clean_filters}) 

1622 # This is a special case for remote reindex 

1623 if current_action == 'reindex': 

1624 # Check only if populated with something. 

1625 if 'remote_filters' in valid_structure['options']: 

1626 valid_filters = SchemaCheck( 

1627 valid_structure['options']['remote_filters'], 

1628 Schema(filters.Filters(current_action, location=loc)), 

1629 'filters', 

1630 '{0}, "filters"'.format(loc) 

1631 ).result() 

1632 clean_remote_filters = validate_filters(current_action, valid_filters) 

1633 clean_config[action_id]['options'].update({'remote_filters': clean_remote_filters}) 

1634 

1635 # if we've gotten this far without any Exceptions raised, it's valid! 

1636 return {'actions': clean_config} 

1637 

1638def health_check(client, **kwargs): 

1639 """ 

1640 This function calls client.cluster.health and, based on the args provided, 

1641 will return `True` or `False` depending on whether that particular keyword 

1642 appears in the output, and has the expected value. 

1643 If multiple keys are provided, all must match for a `True` response. 

1644 

1645 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1646 """ 

1647 LOGGER.debug('KWARGS= "{0}"'.format(kwargs)) 

1648 klist = list(kwargs.keys()) 

1649 if not klist: 

1650 raise exceptions.MissingArgument('Must provide at least one keyword argument') 

1651 hc_data = client.cluster.health() 

1652 response = True 

1653 

1654 for k in klist: 

1655 # First, verify that all kwargs are in the list 

1656 if not k in list(hc_data.keys()): 

1657 raise exceptions.ConfigurationError('Key "{0}" not in cluster health output') 

1658 if not hc_data[k] == kwargs[k]: 

1659 LOGGER.debug( 

1660 'NO MATCH: Value for key "{0}", health check data: ' 

1661 '{1}'.format(kwargs[k], hc_data[k]) 

1662 ) 

1663 response = False 

1664 else: 

1665 LOGGER.debug( 

1666 'MATCH: Value for key "{0}", health check data: ' 

1667 '{1}'.format(kwargs[k], hc_data[k]) 

1668 ) 

1669 if response: 

1670 LOGGER.info('Health Check for all provided keys passed.') 

1671 return response 

1672 

1673def snapshot_check(client, snapshot=None, repository=None): 

1674 """ 

1675 This function calls `client.snapshot.get` and tests to see whether the 

1676 snapshot is complete, and if so, with what status. It will log errors 

1677 according to the result. If the snapshot is still `IN_PROGRESS`, it will 

1678 return `False`. `SUCCESS` will be an `INFO` level message, `PARTIAL` nets 

1679 a `WARNING` message, `FAILED` is an `ERROR`, message, and all others will be 

1680 a `WARNING` level message. 

1681 

1682 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1683 :arg snapshot: The name of the snapshot. 

1684 :arg repository: The Elasticsearch snapshot repository to use 

1685 """ 

1686 try: 

1687 state = client.snapshot.get( 

1688 repository=repository, snapshot=snapshot)['snapshots'][0]['state'] 

1689 except Exception as err: 

1690 raise exceptions.CuratorException( 

1691 'Unable to obtain information for snapshot "{0}" in repository ' 

1692 '"{1}". Error: {2}'.format(snapshot, repository, err) 

1693 ) 

1694 LOGGER.debug('Snapshot state = {0}'.format(state)) 

1695 if state == 'IN_PROGRESS': 

1696 LOGGER.info('Snapshot {0} still in progress.'.format(snapshot)) 

1697 return False 

1698 elif state == 'SUCCESS': 

1699 LOGGER.info( 

1700 'Snapshot {0} successfully completed.'.format(snapshot)) 

1701 elif state == 'PARTIAL': 

1702 LOGGER.warning( 

1703 'Snapshot {0} completed with state PARTIAL.'.format(snapshot)) 

1704 elif state == 'FAILED': 

1705 LOGGER.error( 

1706 'Snapshot {0} completed with state FAILED.'.format(snapshot)) 

1707 else: 

1708 LOGGER.warning( 

1709 'Snapshot {0} completed with state: {0}'.format(snapshot)) 

1710 return True 

1711 

1712def relocate_check(client, index): 

1713 """ 

1714 This function calls client.cluster.state with a given index to check if 

1715 all of the shards for that index are in the STARTED state. It will 

1716 return `True` if all shards both primary and replica are in the STARTED 

1717 state, and it will return `False` if any primary or replica shard is in 

1718 a different state. 

1719 

1720 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1721 :arg index: The index to check the index shards state. 

1722 """ 

1723 shard_state_data = ( 

1724 client.cluster.state(index=index)['routing_table']['indices'][index]['shards'] 

1725 ) 

1726 finished_state = ( 

1727 all( 

1728 all( 

1729 shard['state'] == "STARTED" for shard in shards 

1730 ) 

1731 for shards in shard_state_data.values() 

1732 ) 

1733 ) 

1734 if finished_state: 

1735 LOGGER.info('Relocate Check for index: "{0}" has passed.'.format(index)) 

1736 return finished_state 

1737 

1738 

1739def restore_check(client, index_list): 

1740 """ 

1741 This function calls client.indices.recovery with the list of indices to 

1742 check for complete recovery. It will return `True` if recovery of those 

1743 indices is complete, and `False` otherwise. It is designed to fail fast: 

1744 if a single shard is encountered that is still recovering (not in `DONE` 

1745 stage), it will immediately return `False`, rather than complete iterating 

1746 over the rest of the response. 

1747 

1748 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1749 :arg index_list: The list of indices to verify having been restored. 

1750 """ 

1751 response = {} 

1752 for chunk in chunk_index_list(index_list): 

1753 try: 

1754 chunk_response = client.indices.recovery(index=chunk, human=True) 

1755 except Exception as err: 

1756 raise exceptions.CuratorException( 

1757 'Unable to obtain recovery information for specified indices. ' 

1758 'Error: {0}'.format(err) 

1759 ) 

1760 # This should address #962, where perhaps the cluster state hasn't yet 

1761 # had a chance to add a _recovery state yet, so it comes back empty. 

1762 if chunk_response == {}: 

1763 LOGGER.info('_recovery returned an empty response. Trying again.') 

1764 return False 

1765 response.update(chunk_response) 

1766 # Fixes added in #989 

1767 LOGGER.info('Provided indices: {0}'.format(index_list)) 

1768 LOGGER.info('Found indices: {0}'.format(list(response.keys()))) 

1769 for index in response: 

1770 for shard in range(0, len(response[index]['shards'])): 

1771 # Apparently `is not` is not always `!=`. Unsure why, will 

1772 # research later. Using != fixes #966 

1773 if response[index]['shards'][shard]['stage'] != 'DONE': 

1774 LOGGER.info( 

1775 'Index "{0}" is still in stage "{1}"'.format( 

1776 index, response[index]['shards'][shard]['stage'] 

1777 ) 

1778 ) 

1779 return False 

1780 # If we've gotten here, all of the indices have recovered 

1781 return True 

1782 

1783 

1784def task_check(client, task_id=None): 

1785 """ 

1786 This function calls client.tasks.get with the provided `task_id`. If the 

1787 task data contains ``'completed': True``, then it will return `True` 

1788 If the task is not completed, it will log some information about the task 

1789 and return `False` 

1790 

1791 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1792 :arg task_id: A task_id which ostensibly matches a task searchable in the 

1793 tasks API. 

1794 """ 

1795 try: 

1796 task_data = client.tasks.get(task_id=task_id) 

1797 except Exception as err: 

1798 raise exceptions.CuratorException( 

1799 'Unable to obtain task information for task_id "{0}". Exception ' 

1800 '{1}'.format(task_id, err) 

1801 ) 

1802 task = task_data['task'] 

1803 completed = task_data['completed'] 

1804 if task['action'] == 'indices:data/write/reindex': 

1805 LOGGER.debug('It\'s a REINDEX TASK') 

1806 LOGGER.debug('TASK_DATA: {0}'.format(task_data)) 

1807 LOGGER.debug('TASK_DATA keys: {0}'.format(list(task_data.keys()))) 

1808 if 'response' in task_data: 

1809 response = task_data['response'] 

1810 if response['failures']: 

1811 raise exceptions.FailedReindex( 

1812 'Failures found in reindex response: {0}'.format(response['failures']) 

1813 ) 

1814 running_time = 0.000000001 * task['running_time_in_nanos'] 

1815 LOGGER.debug('Running time: {0} seconds'.format(running_time)) 

1816 descr = task['description'] 

1817 

1818 if completed: 

1819 completion_time = ((running_time * 1000) + task['start_time_in_millis']) 

1820 time_string = time.strftime( 

1821 '%Y-%m-%dT%H:%M:%SZ', time.localtime(completion_time/1000) 

1822 ) 

1823 LOGGER.info('Task "{0}" completed at {1}.'.format(descr, time_string)) 

1824 return True 

1825 else: 

1826 # Log the task status here. 

1827 LOGGER.debug('Full Task Data: {0}'.format(task_data)) 

1828 LOGGER.info( 

1829 'Task "{0}" with task_id "{1}" has been running for ' 

1830 '{2} seconds'.format(descr, task_id, running_time)) 

1831 return False 

1832 

1833 

1834def wait_for_it( 

1835 client, action, task_id=None, snapshot=None, repository=None, 

1836 index=None, index_list=None, wait_interval=9, max_wait=-1 

1837 ): 

1838 """ 

1839 This function becomes one place to do all wait_for_completion type behaviors 

1840 

1841 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1842 :arg action: The action name that will identify how to wait 

1843 :arg task_id: If the action provided a task_id, this is where it must be 

1844 declared. 

1845 :arg snapshot: The name of the snapshot. 

1846 :arg repository: The Elasticsearch snapshot repository to use 

1847 :arg wait_interval: How frequently the specified "wait" behavior will be 

1848 polled to check for completion. 

1849 :arg max_wait: Number of seconds will the "wait" behavior persist 

1850 before giving up and raising an Exception. The default is -1, meaning 

1851 it will try forever. 

1852 """ 

1853 action_map = { 

1854 'allocation':{ 

1855 'function': health_check, 

1856 'args': {'relocating_shards':0}, 

1857 }, 

1858 'replicas':{ 

1859 'function': health_check, 

1860 'args': {'status':'green'}, 

1861 }, 

1862 'cluster_routing':{ 

1863 'function': health_check, 

1864 'args': {'relocating_shards':0}, 

1865 }, 

1866 'snapshot':{ 

1867 'function':snapshot_check, 

1868 'args':{'snapshot':snapshot, 'repository':repository}, 

1869 }, 

1870 'restore':{ 

1871 'function':restore_check, 

1872 'args':{'index_list':index_list}, 

1873 }, 

1874 'reindex':{ 

1875 'function':task_check, 

1876 'args':{'task_id':task_id}, 

1877 }, 

1878 'shrink':{ 

1879 'function': health_check, 

1880 'args': {'status':'green'}, 

1881 }, 

1882 'relocate':{ 

1883 'function': relocate_check, 

1884 'args': {'index':index} 

1885 }, 

1886 } 

1887 wait_actions = list(action_map.keys()) 

1888 

1889 if action not in wait_actions: 

1890 raise exceptions.ConfigurationError( 

1891 '"action" must be one of {0}'.format(wait_actions) 

1892 ) 

1893 if action == 'reindex' and task_id is None: 

1894 raise exceptions.MissingArgument( 

1895 'A task_id must accompany "action" {0}'.format(action) 

1896 ) 

1897 if action == 'snapshot' and ((snapshot is None) or (repository is None)): 

1898 raise exceptions.MissingArgument( 

1899 'A snapshot and repository must accompany "action" {0}. snapshot: ' 

1900 '{1}, repository: {2}'.format(action, snapshot, repository) 

1901 ) 

1902 if action == 'restore' and index_list is None: 

1903 raise exceptions.MissingArgument( 

1904 'An index_list must accompany "action" {0}'.format(action) 

1905 ) 

1906 elif action == 'reindex': 

1907 try: 

1908 _ = client.tasks.get(task_id=task_id) 

1909 except Exception as err: 

1910 # This exception should only exist in API usage. It should never 

1911 # occur in regular Curator usage. 

1912 raise exceptions.CuratorException( 

1913 'Unable to find task_id {0}. Exception: {1}'.format(task_id, err) 

1914 ) 

1915 

1916 # Now with this mapped, we can perform the wait as indicated. 

1917 start_time = datetime.now() 

1918 result = False 

1919 while True: 

1920 elapsed = int((datetime.now() - start_time).total_seconds()) 

1921 LOGGER.debug('Elapsed time: {0} seconds'.format(elapsed)) 

1922 response = action_map[action]['function']( 

1923 client, **action_map[action]['args']) 

1924 LOGGER.debug('Response: {0}'.format(response)) 

1925 # Success 

1926 if response: 

1927 LOGGER.debug( 

1928 'Action "{0}" finished executing (may or may not have been ' 

1929 'successful)'.format(action)) 

1930 result = True 

1931 break 

1932 # Not success, and reached maximum wait (if defined) 

1933 elif (max_wait != -1) and (elapsed >= max_wait): 

1934 LOGGER.error( 

1935 'Unable to complete action "{0}" within max_wait ({1}) ' 

1936 'seconds.'.format(action, max_wait) 

1937 ) 

1938 break 

1939 # Not success, so we wait. 

1940 else: 

1941 LOGGER.debug( 

1942 'Action "{0}" not yet complete, {1} total seconds elapsed. ' 

1943 'Waiting {2} seconds before checking ' 

1944 'again.'.format(action, elapsed, wait_interval)) 

1945 time.sleep(wait_interval) 

1946 

1947 LOGGER.debug('Result: {0}'.format(result)) 

1948 if not result: 

1949 raise exceptions.ActionTimeout( 

1950 'Action "{0}" failed to complete in the max_wait period of ' 

1951 '{1} seconds'.format(action, max_wait) 

1952 ) 

1953 

1954def node_roles(client, node_id): 

1955 """ 

1956 Return the list of roles assigned to the node identified by ``node_id`` 

1957 

1958 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1959 :rtype: list 

1960 """ 

1961 return client.nodes.info()['nodes'][node_id]['roles'] 

1962 

1963def index_size(client, idx, value='total'): 

1964 """ 

1965 Return the sum of either `primaries` or `total` shards for index ``idx`` 

1966 

1967 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1968 :arg idx: An Elasticsearch index 

1969 :arg value: One of either `primaries` or `total` 

1970 :rtype: integer 

1971 """ 

1972 return client.indices.stats(index=idx)['indices'][idx][value]['store']['size_in_bytes'] 

1973 

1974def single_data_path(client, node_id): 

1975 """ 

1976 In order for a shrink to work, it should be on a single filesystem, as 

1977 shards cannot span filesystems. Return `True` if the node has a single 

1978 filesystem, and `False` otherwise. 

1979 

1980 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1981 :rtype: bool 

1982 """ 

1983 return len(client.nodes.stats()['nodes'][node_id]['fs']['data']) == 1 

1984 

1985 

1986def name_to_node_id(client, name): 

1987 """ 

1988 Return the node_id of the node identified by ``name`` 

1989 

1990 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

1991 :rtype: str 

1992 """ 

1993 stats = client.nodes.stats() 

1994 for node in stats['nodes']: 

1995 if stats['nodes'][node]['name'] == name: 

1996 LOGGER.debug('Found node_id "{0}" for name "{1}".'.format(node, name)) 

1997 return node 

1998 LOGGER.error('No node_id found matching name: "{0}"'.format(name)) 

1999 return None 

2000 

2001def node_id_to_name(client, node_id): 

2002 """ 

2003 Return the name of the node identified by ``node_id`` 

2004 

2005 :arg client: An :class:`elasticsearch7.Elasticsearch` client object 

2006 :rtype: str 

2007 """ 

2008 stats = client.nodes.stats() 

2009 name = None 

2010 if node_id in stats['nodes']: 

2011 name = stats['nodes'][node_id]['name'] 

2012 else: 

2013 LOGGER.error('No node_id found matching: "{0}"'.format(node_id)) 

2014 LOGGER.debug('Name associated with node_id "{0}": {1}'.format(node_id, name)) 

2015 return name 

2016 

2017def get_datemath(client, datemath, random_element=None): 

2018 """ 

2019 Return the parsed index name from ``datemath`` 

2020 """ 

2021 if random_element is None: 

2022 random_prefix = ( 

2023 'curator_get_datemath_function_' + 

2024 ''.join(random.choice(string.ascii_lowercase) for _ in range(32)) 

2025 ) 

2026 else: 

2027 random_prefix = 'curator_get_datemath_function_' + random_element 

2028 datemath_dummy = '<{0}-{1}>'.format(random_prefix, datemath) 

2029 # We both want and expect a 404 here (NotFoundError), since we have 

2030 # created a 32 character random string to definitely be an unknown 

2031 # index name. 

2032 LOGGER.debug('Random datemath string for extraction: {0}'.format(datemath_dummy)) 

2033 try: 

2034 client.indices.get(index=datemath_dummy) 

2035 except elasticsearch7.exceptions.NotFoundError as err: 

2036 # This is the magic. Elasticsearch still gave us the formatted 

2037 # index name in the error results. 

2038 faux_index = err.info['error']['index'] 

2039 LOGGER.debug('Response index name for extraction: {0}'.format(faux_index)) 

2040 # Now we strip the random index prefix back out again 

2041 pattern = r'^{0}-(.*)$'.format(random_prefix) 

2042 regex = re.compile(pattern) 

2043 try: 

2044 # And return only the now-parsed date string 

2045 return regex.match(faux_index).group(1) 

2046 except AttributeError: 

2047 raise exceptions.ConfigurationError( 

2048 'The rendered index "{0}" does not contain a valid date pattern ' 

2049 'or has invalid index name characters.'.format(faux_index) 

2050 ) 

2051 

2052def isdatemath(data): 

2053 """Check if data is a datemath expression""" 

2054 initial_check = r'^(.).*(.)$' 

2055 regex = re.compile(initial_check) 

2056 opener = regex.match(data).group(1) 

2057 closer = regex.match(data).group(2) 

2058 LOGGER.debug('opener = {0}, closer = {1}'.format(opener, closer)) 

2059 if (opener == '<' and closer != '>') or (opener != '<' and closer == '>'): 

2060 raise exceptions.ConfigurationError('Incomplete datemath encapsulation in "< >"') 

2061 elif (opener != '<' and closer != '>'): 

2062 return False 

2063 return True 

2064 

2065def parse_datemath(client, value): 

2066 """ 

2067 Check if ``value`` is datemath. 

2068 Parse it if it is. 

2069 Return the bare value otherwise. 

2070 """ 

2071 if not isdatemath(value): 

2072 return value 

2073 else: 

2074 LOGGER.debug('Properly encapsulated, proceeding to next evaluation...') 

2075 # Our pattern has 4 capture groups. 

2076 # 1. Everything after the initial '<' up to the first '{', which we call ``prefix`` 

2077 # 2. Everything between the outermost '{' and '}', which we call ``datemath`` 

2078 # 3. An optional inner '{' and '}' containing a date formatter and potentially a timezone. 

2079 # Not captured. 

2080 # 4. Everything after the last '}' up to the closing '>' 

2081 pattern = r'^<([^\{\}]*)?(\{.*(\{.*\})?\})([^\{\}]*)?>$' 

2082 regex = re.compile(pattern) 

2083 try: 

2084 prefix = regex.match(value).group(1) or '' 

2085 datemath = regex.match(value).group(2) 

2086 # formatter = regex.match(value).group(3) or '' (not captured, but counted) 

2087 suffix = regex.match(value).group(4) or '' 

2088 except AttributeError: 

2089 raise exceptions.ConfigurationError( 

2090 'Value "{0}" does not contain a valid datemath pattern.'.format(value)) 

2091 return '{0}{1}{2}'.format(prefix, get_datemath(client, datemath), suffix) 

2092 

2093def get_write_index(client, alias): 

2094 """Find which index associated with an alias is the write index""" 

2095 try: 

2096 response = client.indices.get_alias(index=alias) 

2097 except: 

2098 raise exceptions.CuratorException('Alias {0} not found'.format(alias)) 

2099 # If there are more than one in the list, one needs to be the write index 

2100 # otherwise the alias is a one to many, and can't do rollover. 

2101 if len(list(response.keys())) > 1: 

2102 for index in list(response.keys()): 

2103 try: 

2104 if response[index]['aliases'][alias]['is_write_index']: 

2105 return index 

2106 except KeyError: 

2107 raise exceptions.FailedExecution( 

2108 'Invalid alias: is_write_index not found in 1 to many alias') 

2109 else: 

2110 # There's only one, so this is it 

2111 return list(response.keys())[0]