Coverage for /home/martinb/.local/share/virtualenvs/camcops/lib/python3.6/site-packages/pygments/regexopt.py : 26%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# -*- coding: utf-8 -*-
2"""
3 pygments.regexopt
4 ~~~~~~~~~~~~~~~~~
6 An algorithm that generates optimized regexes for matching long lists of
7 literal strings.
9 :copyright: Copyright 2006-2021 by the Pygments team, see AUTHORS.
10 :license: BSD, see LICENSE for details.
11"""
13import re
14from re import escape
15from os.path import commonprefix
16from itertools import groupby
17from operator import itemgetter
19CS_ESCAPE = re.compile(r'[\^\\\-\]]')
20FIRST_ELEMENT = itemgetter(0)
23def make_charset(letters):
24 return '[' + CS_ESCAPE.sub(lambda m: '\\' + m.group(), ''.join(letters)) + ']'
27def regex_opt_inner(strings, open_paren):
28 """Return a regex that matches any string in the sorted list of strings."""
29 close_paren = open_paren and ')' or ''
30 # print strings, repr(open_paren)
31 if not strings:
32 # print '-> nothing left'
33 return ''
34 first = strings[0]
35 if len(strings) == 1:
36 # print '-> only 1 string'
37 return open_paren + escape(first) + close_paren
38 if not first:
39 # print '-> first string empty'
40 return open_paren + regex_opt_inner(strings[1:], '(?:') \
41 + '?' + close_paren
42 if len(first) == 1:
43 # multiple one-char strings? make a charset
44 oneletter = []
45 rest = []
46 for s in strings:
47 if len(s) == 1:
48 oneletter.append(s)
49 else:
50 rest.append(s)
51 if len(oneletter) > 1: # do we have more than one oneletter string?
52 if rest:
53 # print '-> 1-character + rest'
54 return open_paren + regex_opt_inner(rest, '') + '|' \
55 + make_charset(oneletter) + close_paren
56 # print '-> only 1-character'
57 return open_paren + make_charset(oneletter) + close_paren
58 prefix = commonprefix(strings)
59 if prefix:
60 plen = len(prefix)
61 # we have a prefix for all strings
62 # print '-> prefix:', prefix
63 return open_paren + escape(prefix) \
64 + regex_opt_inner([s[plen:] for s in strings], '(?:') \
65 + close_paren
66 # is there a suffix?
67 strings_rev = [s[::-1] for s in strings]
68 suffix = commonprefix(strings_rev)
69 if suffix:
70 slen = len(suffix)
71 # print '-> suffix:', suffix[::-1]
72 return open_paren \
73 + regex_opt_inner(sorted(s[:-slen] for s in strings), '(?:') \
74 + escape(suffix[::-1]) + close_paren
75 # recurse on common 1-string prefixes
76 # print '-> last resort'
77 return open_paren + \
78 '|'.join(regex_opt_inner(list(group[1]), '')
79 for group in groupby(strings, lambda s: s[0] == first[0])) \
80 + close_paren
83def regex_opt(strings, prefix='', suffix=''):
84 """Return a compiled regex that matches any string in the given list.
86 The strings to match must be literal strings, not regexes. They will be
87 regex-escaped.
89 *prefix* and *suffix* are pre- and appended to the final regex.
90 """
91 strings = sorted(strings)
92 return prefix + regex_opt_inner(strings, '(') + suffix