Coverage for /Users/Newville/Codes/xraylarch/larch/utils/strutils.py: 30%
212 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-09 10:08 -0600
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-09 10:08 -0600
1#!/usr/bin/env python
2"""
3utilities for larch
4"""
5from __future__ import print_function
6import re
7import sys
8import os
9import uuid
10import hashlib
11from base64 import b64encode, b32encode
12import random
14from packaging import version as pkg_version
16def bytes2str(s):
17 if isinstance(s, str):
18 return s
19 elif isinstance(s, bytes):
20 return s.decode(sys.stdout.encoding)
21 return str(s, sys.stdout.encoding)
23def str2bytes(s):
24 'string to byte conversion'
25 if isinstance(s, bytes):
26 return s
27 return bytes(s, sys.stdout.encoding)
30def strict_ascii(s, replacement='_'):
31 """for string to be truly ASCII with all characters below 128"""
32 t = bytes(s, 'UTF-8')
33 return ''.join([chr(a) if a < 128 else replacement for a in t])
36RESERVED_WORDS = ('False', 'None', 'True', 'and', 'as', 'assert', 'async',
37 'await', 'break', 'class', 'continue', 'def', 'del', 'elif',
38 'else', 'end', 'enddef', 'endfor', 'endif', 'endtry',
39 'endwhile', 'eval', 'except', 'exec', 'execfile', 'finally',
40 'for', 'from', 'global', 'group', 'if', 'import', 'in', 'is',
41 'lambda', 'nonlocal', 'not', 'or', 'pass', 'print', 'raise',
42 'return', 'try', 'while', 'with', 'yield')
45NAME_MATCH = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*$").match
46VALID_SNAME_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_'
47VALID_NAME_CHARS = '.%s' % VALID_SNAME_CHARS
48VALID_CHARS1 = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
50BAD_FILECHARS = ';~,`!%$@$&^?*#:"/|\'\\\t\r\n (){}[]<>'
51GOOD_FILECHARS = '_'*len(BAD_FILECHARS)
53BAD_VARSCHARS = BAD_FILECHARS + '=+-.'
54GOOD_VARSCHARS = '_'*len(BAD_VARSCHARS)
56TRANS_FILE = str.maketrans(BAD_FILECHARS, GOOD_FILECHARS)
57TRANS_VARS = str.maketrans(BAD_VARSCHARS, GOOD_VARSCHARS)
60def PrintExceptErr(err_str, print_trace=True):
61 " print error on exceptions"
62 print('\n***********************************')
63 print(err_str)
64 #print 'PrintExceptErr', err_str
65 try:
66 print('Error: %s' % sys.exc_type)
67 etype, evalue, tback = sys.exc_info()
68 if print_trace == False:
69 tback = ''
70 sys.excepthook(etype, evalue, tback)
71 except:
72 print('Error printing exception error!!')
73 raise
74 print('***********************************\n')
76def strip_comments(sinp, char='#'):
77 "find character in a string, skipping over quoted text"
78 if sinp.find(char) < 0:
79 return sinp
80 i = 0
81 while i < len(sinp):
82 tchar = sinp[i]
83 if tchar in ('"',"'"):
84 eoc = sinp[i+1:].find(tchar)
85 if eoc > 0:
86 i = i + eoc
87 elif tchar == char:
88 return sinp[:i].rstrip()
89 i = i + 1
90 return sinp
92def strip_quotes(t):
93 d3, s3, d1, s1 = '"""', "'''", '"', "'"
94 if hasattr(t, 'startswith'):
95 if ((t.startswith(d3) and t.endswith(d3)) or
96 (t.startswith(s3) and t.endswith(s3))):
97 t = t[3:-3]
98 elif ((t.startswith(d1) and t.endswith(d1)) or
99 (t.startswith(s1) and t.endswith(s1))):
100 t = t[1:-1]
101 return t
103def isValidName(name):
104 "input is a valid name"
105 if name in RESERVED_WORDS:
106 return False
107 tnam = name[:].lower()
108 return NAME_MATCH(tnam) is not None
110def fixName(name, allow_dot=True):
111 "try to fix string to be a valid name"
112 if isValidName(name):
113 return name
115 if isValidName('_%s' % name):
116 return '_%s' % name
117 chars = []
118 valid_chars = VALID_SNAME_CHARS
119 if allow_dot:
120 valid_chars = VALID_NAME_CHARS
121 for s in name:
122 if s not in valid_chars:
123 s = '_'
124 chars.append(s)
125 name = ''.join(chars)
126 # last check (name may begin with a number or .)
127 if not isValidName(name):
128 name = '_%s' % name
129 return name
132def fix_filename(s):
133 """fix string to be a 'good' filename.
134 This may be a more restrictive than the OS, but
135 avoids nasty cases."""
136 t = str(s).translate(TRANS_FILE)
137 if t.count('.') > 1:
138 for i in range(t.count('.') - 1):
139 idot = t.find('.')
140 t = "%s_%s" % (t[:idot], t[idot+1:])
141 return t
143def fix_varname(s):
144 """fix string to be a 'good' variable name."""
145 t = str(s).translate(TRANS_VARS)
147 if len(t) < 1:
148 t = '_unlabeled_'
149 if t[0] not in VALID_CHARS1:
150 t = '_%s' % t
151 while t.endswith('_'):
152 t = t[:-1]
153 return t
155def common_startstring(words):
156 """common starting substring for a list of words"""
157 out = words[0]
158 for tmp in words[1:]:
159 i = 0
160 for a, b in zip(out, tmp):
161 if a == b:
162 i += 1
163 else:
164 out = out[:i]
165 return out
168def unique_name(name, nlist, max=1000):
169 """return name so that is is not in list,
170 by appending _1, _2, ... as necessary up to a max suffix
172 >>> unique_name('foo', ['bar, 'baz'])
173 'foo'
175 >>> unique_name('foo', ['foo', 'bar, 'baz'])
176 'foo_1'
178 """
179 out = name
180 if name in nlist:
181 for i in range(1, max+1):
182 out = "%s_%i" % (name, i)
183 if out not in nlist:
184 break
185 return out
188def isNumber(num):
189 "input is a number"
190 try:
191 x = float(num)
192 return True
193 except (TypeError, ValueError):
194 return False
196def asfloat(x):
197 """try to convert value to float, or fail gracefully"""
198 return float(x) if isNumber(x) else x
202def isLiteralStr(inp):
203 "is a literal string"
204 return ((inp.startswith("'") and inp.endswith("'")) or
205 (inp.startswith('"') and inp.endswith('"')))
208def find_delims(s, delim='"',match=None):
209 """find matching delimeters (quotes, braces, etc) in a string.
210 returns
211 True, index1, index2 if a match is found
212 False, index1, len(s) if a match is not found
213 the delimiter can be set with the keyword arg delim,
214 and the matching delimiter with keyword arg match.
216 if match is None (default), match is set to delim.
218 >>> find_delims(mystr, delim=":")
219 >>> find_delims(mystr, delim='<', match='>')
220 """
221 esc, dbesc = "\\", "\\\\"
222 if match is None:
223 match = delim
224 j = s.find(delim)
225 if j > -1 and s[j:j+len(delim)] == delim:
226 p1, p2, k = None, None, j
227 while k < j+len(s[j+1:]):
228 k = k+1
229 if k > 0: p1 = s[k-1:k]
230 if k > 1: p2 = s[k-2:k]
231 if (s[k:k+len(match)] == match and not (p1 == esc and p2 != dbesc)):
232 return True, j, k+len(match)-1
233 p1 = s[k:k+1]
234 return False, j, len(s)
236def version_ge(v1, v2):
237 "returns whether version string 1 >= version_string2"
238 return pkg_version.parse(v1) >= pkg_version.parse(v2)
240def b32hash(s):
241 """return a base32 hash of a string"""
242 _hash = hashlib.sha256()
243 _hash.update(str2bytes(s))
244 return bytes2str(b32encode(_hash.digest()))
246def b64hash(s):
247 """return a base64 hash of a string"""
248 _hash = hashlib.sha256()
249 _hash.update(str2bytes(s))
250 return bytes2str(b64encode(_hash.digest()))
252def get_sessionid():
253 """get 8 character string encoding machine name and process id"""
254 _hash = hashlib.sha256()
255 _hash.update(f"{uuid.getnode():d} {os.getpid():d}".encode('ASCII'))
256 out = b64encode(_hash.digest()).decode('ASCII')[3:11]
257 return out.replace('/', '-').replace('+', '=')
260def random_varname(n):
261 L = 'abcdefghijklmnopqrstuvwxyz0123456789'
262 return random.choice(L[:26]) + ''.join([random.choice(L) for _ in range(n-1)])
265def file2groupname(filename, slen=9, minlen=2, symtable=None):
266 """create a group name based of filename
267 the group name will have a string component of
268 length slen followed by a 2 digit number
270 Arguments
271 ---------
272 filename (str) filename to use
273 slen (int) maximum length of string portion (default 9)
274 symtable (None or larch symbol table) symbol table for
275 checking that the group name is unique
276 """
278 gname = fix_varname(filename).lower().replace('_', '')
280 if gname[0] not in 'abcdefghijklmnopqrstuvwxyz':
281 gname = random.choice(['a', 'b', 'c', 'd', 'e', 'f', 'g']) + gname
282 if len(gname) < minlen:
283 gname = gname + random_varname(minlen-len(gname))
285 gname = gname[:slen]
286 if symtable is None:
287 return gname
289 gbase = gname
290 scount, count, n = 0, 0, 2
291 while hasattr(symtable, gname):
292 count += 1
293 if count == 100:
294 count = 1
295 scount += 1
296 if scount > 200:
297 scount = 0
298 n = n + 1
299 gbase = gname + random_varname(n)
300 gname = f"{gbase}{count:02d}"
301 return gname
304def break_longstring(s, maxlen=90, n1=20):
305 """breaks a long string into a list of smaller strings,
306 broken at commas, space, tab, period, or slash
308 returns a list of strings, even if length 1"""
310 minlen = maxlen-n1
312 if len(s) < maxlen:
313 return [s]
314 out = []
315 while len(s) > maxlen:
316 icomma = s[minlen:].find(',')
317 ispace = s[minlen:].find(' ')
318 itab = s[minlen:].find('\t')
319 idot = s[minlen:].find('.')
320 islash = s[minlen:].find('/')
321 ibreak = -1
322 if icomma > 0: ibreak = icomma
323 elif ispace > 0: ibreak = ispace
324 elif itab > 0: ibreak = itab
325 elif idot > 0: ibreak = idot
326 elif islash > 0: ibreak = islash
327 if ibreak < 0:
328 ibreak = maxlen
329 out.append(s[:ibreak+minlen+1])
330 s = s[ibreak+minlen+1:]
331 out.append(s)
332 return out