Coverage for curator/helpers/date_ops.py: 99%
269 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-20 21:00 -0600
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-20 21:00 -0600
1"""Curator date and time functions"""
2import logging
3import random
4import re
5import string
6import time
7from datetime import timedelta, datetime
8from elasticsearch8.exceptions import NotFoundError
9from curator.exceptions import ConfigurationError
10from curator.defaults.settings import date_regex
12class TimestringSearch(object):
13 """
14 An object to allow repetitive search against a string, ``searchme``, without
15 having to repeatedly recreate the regex.
17 :param timestring: An ``strftime`` pattern
18 :type timestring: :py:func:`~.time.strftime`
19 """
20 def __init__(self, timestring):
21 # pylint: disable=consider-using-f-string
22 regex = r'(?P<date>{0})'.format(get_date_regex(timestring))
24 #: Object attribute. ``re.compile(regex)`` where
25 #: ``regex = r'(?P<date>{0})'.format(get_date_regex(timestring))``. Uses
26 #: :py:func:`get_date_regex`
27 self.pattern = re.compile(regex)
28 #: Object attribute preserving param ``timestring``
29 self.timestring = timestring
31 def get_epoch(self, searchme):
32 """
33 :param searchme: A string to be matched against :py:attr:`pattern` that matches
34 :py:attr:`timestring`
36 :returns: The epoch timestamp extracted from ``searchme`` by regex matching against
37 :py:attr:`pattern`
38 :rtype: int
39 """
40 match = self.pattern.search(searchme)
41 if match:
42 if match.group("date"):
43 timestamp = match.group("date")
44 return datetime_to_epoch(get_datetime(timestamp, self.timestring))
46def absolute_date_range(
47 unit, date_from, date_to,
48 date_from_format=None, date_to_format=None
49 ):
50 """
51 This function calculates a date range with an absolute time stamp for both the start time and
52 the end time. These dates are converted to epoch time. The parameter ``unit`` is used when the
53 same simplified date is used for both ``date_from`` and ``date_to`` to calculate the duration.
54 For example, if ``unit`` is ``months``, and ``date_from`` and ``date_to`` are both ``2017.01``,
55 then the entire month of January 2017 will be the absolute date range.
57 :param unit: One of ``hours``, ``days``, ``weeks``, ``months``, or ``years``.
58 :param date_from: The simplified date for the start of the range
59 :param date_to: The simplified date for the end of the range.
60 :param date_from_format: The :py:func:`~.time.strftime` string used to parse ``date_from``
61 :param date_to_format: The :py:func:`~.time.strftime` string used to parse ``date_to``
63 :type unit: str
64 :type date_from: str
65 :type date_to: str
66 :type date_from_format: str
67 :type date_to_format: str
69 :returns: The epoch start time and end time of a date range
70 :rtype: tuple
71 """
72 logger = logging.getLogger(__name__)
73 acceptable_units = ['seconds', 'minutes', 'hours', 'days', 'weeks', 'months', 'years']
74 if unit not in acceptable_units:
75 raise ConfigurationError(f'"unit" must be one of: {acceptable_units}')
76 if not date_from_format or not date_to_format:
77 raise ConfigurationError('Must provide "date_from_format" and "date_to_format"')
78 try:
79 start_epoch = datetime_to_epoch(get_datetime(date_from, date_from_format))
80 logger.debug('Start ISO8601 = %s', datetime.utcfromtimestamp(start_epoch).isoformat())
81 except Exception as err:
82 raise ConfigurationError(
83 f'Unable to parse "date_from" {date_from} and "date_from_format" {date_from_format}. '
84 f'Error: {err}'
85 ) from err
86 try:
87 end_date = get_datetime(date_to, date_to_format)
88 except Exception as err:
89 raise ConfigurationError(
90 f'Unable to parse "date_to" {date_to} and "date_to_format" {date_to_format}. '
91 f'Error: {err}'
92 ) from err
93 # We have to iterate to one more month, and then subtract a second to get
94 # the last day of the correct month
95 if unit == 'months':
96 month = end_date.month
97 year = end_date.year
98 if month == 12:
99 year += 1
100 month = 1
101 else:
102 month += 1
103 new_end_date = datetime(year, month, 1, 0, 0, 0)
104 end_epoch = datetime_to_epoch(new_end_date) - 1
105 # Similarly, with years, we need to get the last moment of the year
106 elif unit == 'years':
107 new_end_date = datetime(end_date.year + 1, 1, 1, 0, 0, 0)
108 end_epoch = datetime_to_epoch(new_end_date) - 1
109 # It's not months or years, which have inconsistent reckoning...
110 else:
111 # This lets us use an existing method to simply add 1 more unit's worth
112 # of seconds to get hours, days, or weeks, as they don't change
113 # We use -1 as point of reference normally subtracts from the epoch
114 # and we need to add to it, so we'll make it subtract a negative value.
115 # Then, as before, subtract 1 to get the end of the period
116 end_epoch = get_point_of_reference(
117 unit, -1, epoch=datetime_to_epoch(end_date)) -1
119 logger.debug('End ISO8601 = %s', datetime.utcfromtimestamp(end_epoch).isoformat())
120 return (start_epoch, end_epoch)
122def date_range(unit, range_from, range_to, epoch=None, week_starts_on='sunday'):
123 """
124 This function calculates a date range with a distinct epoch time stamp of both the start time
125 and the end time in counts of ``unit`` relative to the time at execution, if ``epoch`` is not
126 set.
128 If ``unit`` is ``weeks``, you can also determine when a week begins using ``week_starts_on``,
129 which can be either ``sunday`` or ``monday``.
131 :param unit: One of ``hours``, ``days``, ``weeks``, ``months``, or ``years``.
132 :param range_from: Count of ``unit`` in the past/future is the origin?
133 :param range_to: Count of ``unit`` in the past/future is the end point?
134 :param epoch: An epoch timestamp used to establish a point of reference for calculations.
135 :param week_starts_on: Either ``sunday`` or ``monday``. Default is ``sunday``
137 :type unit: str
138 :type range_from: int
139 :type range_to: int
140 :type epoch: int
141 :type week_starts_on: str
143 :returns: The epoch start time and end time of a date range
144 :rtype: tuple
145 """
146 logger = logging.getLogger(__name__)
147 acceptable_units = ['hours', 'days', 'weeks', 'months', 'years']
148 if unit not in acceptable_units:
149 raise ConfigurationError(f'"unit" must be one of: {acceptable_units}')
150 if not range_to >= range_from:
151 raise ConfigurationError('"range_to" must be greater than or equal to "range_from"')
152 if not epoch:
153 epoch = time.time()
154 epoch = fix_epoch(epoch)
155 raw_point_of_ref = datetime.utcfromtimestamp(epoch)
156 logger.debug('Raw point of Reference = %s', raw_point_of_ref)
157 # Reverse the polarity, because -1 as last week makes sense when read by
158 # humans, but datetime timedelta math makes -1 in the future.
159 origin = range_from * -1
160 # These if statements help get the start date or start_delta
161 if unit == 'hours':
162 point_of_ref = datetime(
163 raw_point_of_ref.year, raw_point_of_ref.month, raw_point_of_ref.day,
164 raw_point_of_ref.hour, 0, 0
165 )
166 start_delta = timedelta(hours=origin)
167 if unit == 'days':
168 point_of_ref = datetime(
169 raw_point_of_ref.year, raw_point_of_ref.month,
170 raw_point_of_ref.day, 0, 0, 0
171 )
172 start_delta = timedelta(days=origin)
173 if unit == 'weeks':
174 point_of_ref = datetime(
175 raw_point_of_ref.year, raw_point_of_ref.month, raw_point_of_ref.day, 0, 0, 0)
176 sunday = False
177 if week_starts_on.lower() == 'sunday':
178 sunday = True
179 weekday = point_of_ref.weekday()
180 # Compensate for ISO week starting on Monday by default
181 if sunday:
182 weekday += 1
183 logger.debug('Weekday = %s', weekday)
184 start_delta = timedelta(days=weekday, weeks=origin)
185 if unit == 'months':
186 point_of_ref = datetime(raw_point_of_ref.year, raw_point_of_ref.month, 1, 0, 0, 0)
187 year = raw_point_of_ref.year
188 month = raw_point_of_ref.month
189 if origin > 0:
190 for _ in range(0, origin):
191 if month == 1:
192 year -= 1
193 month = 12
194 else:
195 month -= 1
196 else:
197 for _ in range(origin, 0):
198 if month == 12:
199 year += 1
200 month = 1
201 else:
202 month += 1
203 start_date = datetime(year, month, 1, 0, 0, 0)
204 if unit == 'years':
205 point_of_ref = datetime(raw_point_of_ref.year, 1, 1, 0, 0, 0)
206 start_date = datetime(raw_point_of_ref.year - origin, 1, 1, 0, 0, 0)
207 if unit not in ['months', 'years']:
208 start_date = point_of_ref - start_delta
209 # By this point, we know our start date and can convert it to epoch time
210 start_epoch = datetime_to_epoch(start_date)
211 logger.debug('Start ISO8601 = %s', datetime.utcfromtimestamp(start_epoch).isoformat())
212 # This is the number of units we need to consider.
213 count = (range_to - range_from) + 1
214 # We have to iterate to one more month, and then subtract a second to get
215 # the last day of the correct month
216 if unit == 'months':
217 month = start_date.month
218 year = start_date.year
219 for _ in range(0, count):
220 if month == 12:
221 year += 1
222 month = 1
223 else:
224 month += 1
225 end_date = datetime(year, month, 1, 0, 0, 0)
226 end_epoch = datetime_to_epoch(end_date) - 1
227 # Similarly, with years, we need to get the last moment of the year
228 elif unit == 'years':
229 end_date = datetime((raw_point_of_ref.year - origin) + count, 1, 1, 0, 0, 0)
230 end_epoch = datetime_to_epoch(end_date) - 1
231 # It's not months or years, which have inconsistent reckoning...
232 else:
233 # This lets us use an existing method to simply add unit * count seconds
234 # to get hours, days, or weeks, as they don't change
235 end_epoch = get_point_of_reference(
236 unit, count * -1, epoch=start_epoch) -1
237 logger.debug('End ISO8601 = %s', datetime.utcfromtimestamp(end_epoch).isoformat())
238 return (start_epoch, end_epoch)
240def datetime_to_epoch(mydate):
241 """
242 Converts datetime into epoch seconds
244 :param mydate: A Python datetime
245 :type mydate: :py:class:`~.datetime.datetime`
247 :returns: An epoch timestamp based on ``mydate``
248 :rtype: int
249 """
250 tdelta = (mydate - datetime(1970, 1, 1))
251 return tdelta.seconds + tdelta.days * 24 * 3600
253def fix_epoch(epoch):
254 """
255 Fix value of ``epoch`` to be the count since the epoch in seconds only, which should be 10 or
256 fewer digits long.
258 :param epoch: An epoch timestamp, in epoch + milliseconds, or microsecond, or even nanoseconds.
259 :type epoch: int
261 :returns: An epoch timestamp in seconds only, based on ``epoch``
262 :rtype: int
263 """
264 try:
265 # No decimals allowed
266 epoch = int(epoch)
267 except Exception as err:
268 raise ValueError(f'Bad epoch value. Unable to convert {epoch} to int. {err}') from err
270 # If we're still using this script past January, 2038, we have bigger
271 # problems than my hacky math here...
272 if len(str(epoch)) <= 10:
273 # Epoch is fine, no changes
274 pass
275 elif len(str(epoch)) > 10 and len(str(epoch)) <= 13:
276 epoch = int(epoch/1000)
277 else:
278 orders_of_magnitude = len(str(epoch)) - 10
279 powers_of_ten = 10**orders_of_magnitude
280 epoch = int(epoch/powers_of_ten)
281 return epoch
283def get_date_regex(timestring):
284 """
285 :param timestring: An ``strftime`` pattern
286 :type timestring: :py:func:`~.time.strftime`
288 :returns: A regex string based on a provided :py:func:`~.time.strftime` ``timestring``.
289 :rtype: str
290 """
291 logger = logging.getLogger(__name__)
292 prev, regex = ('', '')
293 logger.debug('Provided timestring = "%s"', timestring)
294 for idx, char in enumerate(timestring):
295 logger.debug('Current character: %s Array position: %s', char, idx)
296 if char == '%':
297 pass
298 elif char in date_regex() and prev == '%':
299 regex += r'\d{' + date_regex()[char] + '}'
300 elif char in ['.', '-']:
301 regex += "\\" + char
302 else:
303 regex += char
304 prev = char
305 logger.debug('regex = %s', regex)
306 return regex
308def get_datemath(client, datemath, random_element=None):
309 """
310 :param client: A client connection object
311 :param datemath: An elasticsearch datemath string
312 :param random_element: This allows external randomization of the name and is only useful for
313 tests so that you can guarantee the output because you provided the random portion.
315 :type client: :py:class:`~.elasticsearch.Elasticsearch`
316 :type datemath: :py:class:`~.datemath.datemath`
317 :type random_element: str
319 :returns: the parsed index name from ``datemath``
320 :rtype: str
321 """
322 logger = logging.getLogger(__name__)
323 if random_element is None:
324 random_prefix = (
325 'curator_get_datemath_function_' +
326 ''.join(random.choice(string.ascii_lowercase) for _ in range(32))
327 )
328 else:
329 random_prefix = 'curator_get_datemath_function_' + random_element
330 datemath_dummy = f'<{random_prefix}-{datemath}>'
331 # We both want and expect a 404 here (NotFoundError), since we have
332 # created a 32 character random string to definitely be an unknown
333 # index name.
334 logger.debug('Random datemath string for extraction: %s', datemath_dummy)
335 faux_index = ''
336 try:
337 client.indices.get(index=datemath_dummy)
338 except NotFoundError as err:
339 # This is the magic. Elasticsearch still gave us the formatted
340 # index name in the error results.
341 faux_index = err.body['error']['index']
342 logger.debug('Response index name for extraction: %s', faux_index)
343 # Now we strip the random index prefix back out again
344 # pylint: disable=consider-using-f-string
345 pattern = r'^{0}-(.*)$'.format(random_prefix)
346 regex = re.compile(pattern)
347 try:
348 # And return only the now-parsed date string
349 return regex.match(faux_index).group(1)
350 except AttributeError as exc:
351 raise ConfigurationError(
352 f'The rendered index "{faux_index}" does not contain a valid date pattern '
353 f'or has invalid index name characters.'
354 ) from exc
356def get_datetime(index_timestamp, timestring):
357 """
358 :param index_timestamp: The index timestamp
359 :param timestring: An ``strftime`` pattern
361 :type index_timestamp: str
362 :type timestring: :py:func:`~.time.strftime`
364 :returns: The datetime extracted from the index name, which is the index creation time.
365 :rtype: :py:class:`~.datetime.datetime`
366 """
367 # Compensate for week of year by appending '%w' to the timestring
368 # and '1' (Monday) to index_timestamp
369 iso_week_number = False
370 if '%W' in timestring or '%U' in timestring or '%V' in timestring:
371 timestring += '%w'
372 index_timestamp += '1'
373 if '%V' in timestring and '%G' in timestring:
374 iso_week_number = True
375 # Fake as so we read Greg format instead. We will process it later
376 timestring = timestring.replace("%G", "%Y").replace("%V", "%W")
377 elif '%m' in timestring:
378 if not '%d' in timestring:
379 timestring += '%d'
380 index_timestamp += '1'
382 mydate = datetime.strptime(index_timestamp, timestring)
384 # Handle ISO time string
385 if iso_week_number:
386 mydate = handle_iso_week_number(mydate, timestring, index_timestamp)
388 return mydate
390def get_point_of_reference(unit, count, epoch=None):
391 """
392 :param unit: One of ``seconds``, ``minutes``, ``hours``, ``days``, ``weeks``, ``months``, or
393 ``years``.
394 :param unit_count: The number of ``units``. ``unit_count`` * ``unit`` will be calculated out to
395 the relative number of seconds.
396 :param epoch: An epoch timestamp used in conjunction with ``unit`` and ``unit_count`` to
397 establish a point of reference for calculations.
399 :type unit: str
400 :type unit_count: int
401 :type epoch: int
403 :returns: A point-of-reference timestamp in epoch + milliseconds by deriving from a ``unit``
404 and a ``count``, and an optional reference timestamp, ``epoch``
405 :rtype: int
406 """
407 if unit == 'seconds':
408 multiplier = 1
409 elif unit == 'minutes':
410 multiplier = 60
411 elif unit == 'hours':
412 multiplier = 3600
413 elif unit == 'days':
414 multiplier = 3600*24
415 elif unit == 'weeks':
416 multiplier = 3600*24*7
417 elif unit == 'months':
418 multiplier = 3600*24*30
419 elif unit == 'years':
420 multiplier = 3600*24*365
421 else:
422 raise ValueError(f'Invalid unit: {unit}.')
423 # Use this moment as a reference point, if one is not provided.
424 if not epoch:
425 epoch = time.time()
426 epoch = fix_epoch(epoch)
427 return epoch - multiplier * count
429def get_unit_count_from_name(index_name, pattern):
430 """
431 :param index_name: An index name
432 :param pattern: A regular expression pattern
434 :type index_name: str
435 :type pattern: str
437 :returns: The unit count, if a match is able to be found in the name
438 :rtype: int
439 """
440 if pattern is None:
441 return None
442 match = pattern.search(index_name)
443 if match:
444 try:
445 return int(match.group(1))
446 # pylint: disable=broad-except
447 except Exception:
448 return None
449 else:
450 return None
452def handle_iso_week_number(mydate, timestring, index_timestamp):
453 """
454 :param mydate: A Python datetime
455 :param timestring: An ``strftime`` pattern
456 :param index_timestamp: The index timestamp
458 :type mydate: :py:class:`~.datetime.datetime`
459 :type timestring: :py:func:`~.time.strftime`
460 :type index_timestamp: str
462 :returns: The date of the previous week based on ISO week number
463 :rtype: :py:class:`~.datetime.datetime`
464 """
465 date_iso = mydate.isocalendar()
466 # iso_week_str = "{Y:04d}{W:02d}".format(Y=date_iso[0], W=date_iso[1])
467 iso_week_str = f'{date_iso[0]:04d}{date_iso[1]:02d}'
468 greg_week_str = datetime.strftime(mydate, "%Y%W")
470 # Edge case 1: ISO week number is bigger than Greg week number.
471 # Ex: year 2014, all ISO week numbers were 1 more than in Greg.
472 if (iso_week_str > greg_week_str or
473 # Edge case 2: 2010-01-01 in ISO: 2009.W53, in Greg: 2010.W00
474 # For Greg converting 2009.W53 gives 2010-01-04, converting back
475 # to same timestring gives: 2010.W01.
476 datetime.strftime(mydate, timestring) != index_timestamp):
478 # Remove one week in this case
479 mydate = mydate - timedelta(days=7)
480 return mydate
482def isdatemath(data):
483 """
484 :param data: An expression to validate as being datemath or not
485 :type data: str
487 :returns: ``True`` if ``data`` is a valid datemath expression, else ``False``
488 :rtype: bool
489 """
490 logger = logging.getLogger(__name__)
491 initial_check = r'^(.).*(.)$'
492 regex = re.compile(initial_check)
493 opener = regex.match(data).group(1)
494 closer = regex.match(data).group(2)
495 logger.debug('opener = %s, closer = %s', opener, closer)
496 if (opener == '<' and closer != '>') or (opener != '<' and closer == '>'):
497 raise ConfigurationError('Incomplete datemath encapsulation in "< >"')
498 if (opener != '<' and closer != '>'):
499 return False
500 return True
502def parse_date_pattern(name):
503 """
504 Scan and parse ``name`` for :py:func:`~.time.strftime` strings, replacing them with the
505 associated value when found, but otherwise returning lowercase values, as uppercase snapshot
506 names are not allowed. It will detect if the first character is a ``<``, which would indicate
507 ``name`` is going to be using Elasticsearch date math syntax, and skip accordingly.
509 The :py:func:`~.time.strftime` identifiers that Curator currently recognizes as acceptable
510 include:
512 * ``Y``: A 4 digit year
513 * ``y``: A 2 digit year
514 * ``m``: The 2 digit month
515 * ``W``: The 2 digit week of the year
516 * ``d``: The 2 digit day of the month
517 * ``H``: The 2 digit hour of the day, in 24 hour notation
518 * ``M``: The 2 digit minute of the hour
519 * ``S``: The 2 digit number of second of the minute
520 * ``j``: The 3 digit day of the year
522 :param name: A name, which can contain :py:func:`~.time.strftime` strings
523 :type name: str
525 :returns: The parsed date pattern
526 :rtype: str
527 """
528 logger = logging.getLogger(__name__)
529 prev, rendered = ('', '')
530 logger.debug('Provided index name: %s', name)
531 for idx, char in enumerate(name):
532 logger.debug('Current character in provided name: %s, position: %s', char, idx)
533 if char == '<':
534 logger.info('"%s" is probably using Elasticsearch date math.', name)
535 rendered = name
536 break
537 if char == '%':
538 pass
539 elif char in date_regex() and prev == '%':
540 rendered += str(datetime.utcnow().strftime(f'%{char}'))
541 else:
542 rendered += char
543 logger.debug('Partially rendered name: %s', rendered)
544 prev = char
545 logger.debug('Fully rendered name: %s', rendered)
546 return rendered
548def parse_datemath(client, value):
549 """
550 Validate that ``value`` looks like proper datemath. If it passes this test, then try to ship it
551 to Elasticsearch for real. It may yet fail this test, and if it does, it will raise a
552 :py:exc:`~.curator.exceptions.ConfigurationError` exception. If it passes, return the fully
553 parsed string.
555 :param client: A client connection object
556 :param value: A string to check for datemath
558 :type client: :py:class:`~.elasticsearch.Elasticsearch`
559 :type value: str
561 :returns: A datemath indexname, fully rendered by Elasticsearch
562 :rtype: str
563 """
564 logger = logging.getLogger(__name__)
565 if not isdatemath(value):
566 return value
567 # if we didn't return here, we can continue, no 'else' needed.
568 logger.debug('Properly encapsulated, proceeding to next evaluation...')
569 # Our pattern has 4 capture groups.
570 # 1. Everything after the initial '<' up to the first '{', which we call ``prefix``
571 # 2. Everything between the outermost '{' and '}', which we call ``datemath``
572 # 3. An optional inner '{' and '}' containing a date formatter and potentially a timezone.
573 # Not captured.
574 # 4. Everything after the last '}' up to the closing '>'
575 pattern = r'^<([^\{\}]*)?(\{.*(\{.*\})?\})([^\{\}]*)?>$'
576 regex = re.compile(pattern)
577 try:
578 prefix = regex.match(value).group(1) or ''
579 datemath = regex.match(value).group(2)
580 # formatter = regex.match(value).group(3) or '' (not captured, but counted)
581 suffix = regex.match(value).group(4) or ''
582 except AttributeError as exc:
583 raise ConfigurationError(
584 f'Value "{value}" does not contain a valid datemath pattern.') from exc
586 return f'{prefix}{get_datemath(client, datemath)}{suffix}'