Coverage for /Users/Newville/Codes/xraylarch/larch/utils/strutils.py: 30%

212 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-09 10:08 -0600

1#!/usr/bin/env python 

2""" 

3utilities for larch 

4""" 

5from __future__ import print_function 

6import re 

7import sys 

8import os 

9import uuid 

10import hashlib 

11from base64 import b64encode, b32encode 

12import random 

13 

14from packaging import version as pkg_version 

15 

16def bytes2str(s): 

17 if isinstance(s, str): 

18 return s 

19 elif isinstance(s, bytes): 

20 return s.decode(sys.stdout.encoding) 

21 return str(s, sys.stdout.encoding) 

22 

23def str2bytes(s): 

24 'string to byte conversion' 

25 if isinstance(s, bytes): 

26 return s 

27 return bytes(s, sys.stdout.encoding) 

28 

29 

30def strict_ascii(s, replacement='_'): 

31 """for string to be truly ASCII with all characters below 128""" 

32 t = bytes(s, 'UTF-8') 

33 return ''.join([chr(a) if a < 128 else replacement for a in t]) 

34 

35 

36RESERVED_WORDS = ('False', 'None', 'True', 'and', 'as', 'assert', 'async', 

37 'await', 'break', 'class', 'continue', 'def', 'del', 'elif', 

38 'else', 'end', 'enddef', 'endfor', 'endif', 'endtry', 

39 'endwhile', 'eval', 'except', 'exec', 'execfile', 'finally', 

40 'for', 'from', 'global', 'group', 'if', 'import', 'in', 'is', 

41 'lambda', 'nonlocal', 'not', 'or', 'pass', 'print', 'raise', 

42 'return', 'try', 'while', 'with', 'yield') 

43 

44 

45NAME_MATCH = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]*(\.[a-zA-Z_][a-zA-Z0-9_]*)*$").match 

46VALID_SNAME_CHARS = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_' 

47VALID_NAME_CHARS = '.%s' % VALID_SNAME_CHARS 

48VALID_CHARS1 = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_' 

49 

50BAD_FILECHARS = ';~,`!%$@$&^?*#:"/|\'\\\t\r\n (){}[]<>' 

51GOOD_FILECHARS = '_'*len(BAD_FILECHARS) 

52 

53BAD_VARSCHARS = BAD_FILECHARS + '=+-.' 

54GOOD_VARSCHARS = '_'*len(BAD_VARSCHARS) 

55 

56TRANS_FILE = str.maketrans(BAD_FILECHARS, GOOD_FILECHARS) 

57TRANS_VARS = str.maketrans(BAD_VARSCHARS, GOOD_VARSCHARS) 

58 

59 

60def PrintExceptErr(err_str, print_trace=True): 

61 " print error on exceptions" 

62 print('\n***********************************') 

63 print(err_str) 

64 #print 'PrintExceptErr', err_str 

65 try: 

66 print('Error: %s' % sys.exc_type) 

67 etype, evalue, tback = sys.exc_info() 

68 if print_trace == False: 

69 tback = '' 

70 sys.excepthook(etype, evalue, tback) 

71 except: 

72 print('Error printing exception error!!') 

73 raise 

74 print('***********************************\n') 

75 

76def strip_comments(sinp, char='#'): 

77 "find character in a string, skipping over quoted text" 

78 if sinp.find(char) < 0: 

79 return sinp 

80 i = 0 

81 while i < len(sinp): 

82 tchar = sinp[i] 

83 if tchar in ('"',"'"): 

84 eoc = sinp[i+1:].find(tchar) 

85 if eoc > 0: 

86 i = i + eoc 

87 elif tchar == char: 

88 return sinp[:i].rstrip() 

89 i = i + 1 

90 return sinp 

91 

92def strip_quotes(t): 

93 d3, s3, d1, s1 = '"""', "'''", '"', "'" 

94 if hasattr(t, 'startswith'): 

95 if ((t.startswith(d3) and t.endswith(d3)) or 

96 (t.startswith(s3) and t.endswith(s3))): 

97 t = t[3:-3] 

98 elif ((t.startswith(d1) and t.endswith(d1)) or 

99 (t.startswith(s1) and t.endswith(s1))): 

100 t = t[1:-1] 

101 return t 

102 

103def isValidName(name): 

104 "input is a valid name" 

105 if name in RESERVED_WORDS: 

106 return False 

107 tnam = name[:].lower() 

108 return NAME_MATCH(tnam) is not None 

109 

110def fixName(name, allow_dot=True): 

111 "try to fix string to be a valid name" 

112 if isValidName(name): 

113 return name 

114 

115 if isValidName('_%s' % name): 

116 return '_%s' % name 

117 chars = [] 

118 valid_chars = VALID_SNAME_CHARS 

119 if allow_dot: 

120 valid_chars = VALID_NAME_CHARS 

121 for s in name: 

122 if s not in valid_chars: 

123 s = '_' 

124 chars.append(s) 

125 name = ''.join(chars) 

126 # last check (name may begin with a number or .) 

127 if not isValidName(name): 

128 name = '_%s' % name 

129 return name 

130 

131 

132def fix_filename(s): 

133 """fix string to be a 'good' filename. 

134 This may be a more restrictive than the OS, but 

135 avoids nasty cases.""" 

136 t = str(s).translate(TRANS_FILE) 

137 if t.count('.') > 1: 

138 for i in range(t.count('.') - 1): 

139 idot = t.find('.') 

140 t = "%s_%s" % (t[:idot], t[idot+1:]) 

141 return t 

142 

143def fix_varname(s): 

144 """fix string to be a 'good' variable name.""" 

145 t = str(s).translate(TRANS_VARS) 

146 

147 if len(t) < 1: 

148 t = '_unlabeled_' 

149 if t[0] not in VALID_CHARS1: 

150 t = '_%s' % t 

151 while t.endswith('_'): 

152 t = t[:-1] 

153 return t 

154 

155def common_startstring(words): 

156 """common starting substring for a list of words""" 

157 out = words[0] 

158 for tmp in words[1:]: 

159 i = 0 

160 for a, b in zip(out, tmp): 

161 if a == b: 

162 i += 1 

163 else: 

164 out = out[:i] 

165 return out 

166 

167 

168def unique_name(name, nlist, max=1000): 

169 """return name so that is is not in list, 

170 by appending _1, _2, ... as necessary up to a max suffix 

171 

172 >>> unique_name('foo', ['bar, 'baz']) 

173 'foo' 

174 

175 >>> unique_name('foo', ['foo', 'bar, 'baz']) 

176 'foo_1' 

177 

178 """ 

179 out = name 

180 if name in nlist: 

181 for i in range(1, max+1): 

182 out = "%s_%i" % (name, i) 

183 if out not in nlist: 

184 break 

185 return out 

186 

187 

188def isNumber(num): 

189 "input is a number" 

190 try: 

191 x = float(num) 

192 return True 

193 except (TypeError, ValueError): 

194 return False 

195 

196def asfloat(x): 

197 """try to convert value to float, or fail gracefully""" 

198 return float(x) if isNumber(x) else x 

199 

200 

201 

202def isLiteralStr(inp): 

203 "is a literal string" 

204 return ((inp.startswith("'") and inp.endswith("'")) or 

205 (inp.startswith('"') and inp.endswith('"'))) 

206 

207 

208def find_delims(s, delim='"',match=None): 

209 """find matching delimeters (quotes, braces, etc) in a string. 

210 returns 

211 True, index1, index2 if a match is found 

212 False, index1, len(s) if a match is not found 

213 the delimiter can be set with the keyword arg delim, 

214 and the matching delimiter with keyword arg match. 

215 

216 if match is None (default), match is set to delim. 

217 

218 >>> find_delims(mystr, delim=":") 

219 >>> find_delims(mystr, delim='<', match='>') 

220 """ 

221 esc, dbesc = "\\", "\\\\" 

222 if match is None: 

223 match = delim 

224 j = s.find(delim) 

225 if j > -1 and s[j:j+len(delim)] == delim: 

226 p1, p2, k = None, None, j 

227 while k < j+len(s[j+1:]): 

228 k = k+1 

229 if k > 0: p1 = s[k-1:k] 

230 if k > 1: p2 = s[k-2:k] 

231 if (s[k:k+len(match)] == match and not (p1 == esc and p2 != dbesc)): 

232 return True, j, k+len(match)-1 

233 p1 = s[k:k+1] 

234 return False, j, len(s) 

235 

236def version_ge(v1, v2): 

237 "returns whether version string 1 >= version_string2" 

238 return pkg_version.parse(v1) >= pkg_version.parse(v2) 

239 

240def b32hash(s): 

241 """return a base32 hash of a string""" 

242 _hash = hashlib.sha256() 

243 _hash.update(str2bytes(s)) 

244 return bytes2str(b32encode(_hash.digest())) 

245 

246def b64hash(s): 

247 """return a base64 hash of a string""" 

248 _hash = hashlib.sha256() 

249 _hash.update(str2bytes(s)) 

250 return bytes2str(b64encode(_hash.digest())) 

251 

252def get_sessionid(): 

253 """get 8 character string encoding machine name and process id""" 

254 _hash = hashlib.sha256() 

255 _hash.update(f"{uuid.getnode():d} {os.getpid():d}".encode('ASCII')) 

256 out = b64encode(_hash.digest()).decode('ASCII')[3:11] 

257 return out.replace('/', '-').replace('+', '=') 

258 

259 

260def random_varname(n): 

261 L = 'abcdefghijklmnopqrstuvwxyz0123456789' 

262 return random.choice(L[:26]) + ''.join([random.choice(L) for _ in range(n-1)]) 

263 

264 

265def file2groupname(filename, slen=9, minlen=2, symtable=None): 

266 """create a group name based of filename 

267 the group name will have a string component of 

268 length slen followed by a 2 digit number 

269 

270 Arguments 

271 --------- 

272 filename (str) filename to use 

273 slen (int) maximum length of string portion (default 9) 

274 symtable (None or larch symbol table) symbol table for 

275 checking that the group name is unique 

276 """ 

277 

278 gname = fix_varname(filename).lower().replace('_', '') 

279 

280 if gname[0] not in 'abcdefghijklmnopqrstuvwxyz': 

281 gname = random.choice(['a', 'b', 'c', 'd', 'e', 'f', 'g']) + gname 

282 if len(gname) < minlen: 

283 gname = gname + random_varname(minlen-len(gname)) 

284 

285 gname = gname[:slen] 

286 if symtable is None: 

287 return gname 

288 

289 gbase = gname 

290 scount, count, n = 0, 0, 2 

291 while hasattr(symtable, gname): 

292 count += 1 

293 if count == 100: 

294 count = 1 

295 scount += 1 

296 if scount > 200: 

297 scount = 0 

298 n = n + 1 

299 gbase = gname + random_varname(n) 

300 gname = f"{gbase}{count:02d}" 

301 return gname 

302 

303 

304def break_longstring(s, maxlen=90, n1=20): 

305 """breaks a long string into a list of smaller strings, 

306 broken at commas, space, tab, period, or slash 

307 

308 returns a list of strings, even if length 1""" 

309 

310 minlen = maxlen-n1 

311 

312 if len(s) < maxlen: 

313 return [s] 

314 out = [] 

315 while len(s) > maxlen: 

316 icomma = s[minlen:].find(',') 

317 ispace = s[minlen:].find(' ') 

318 itab = s[minlen:].find('\t') 

319 idot = s[minlen:].find('.') 

320 islash = s[minlen:].find('/') 

321 ibreak = -1 

322 if icomma > 0: ibreak = icomma 

323 elif ispace > 0: ibreak = ispace 

324 elif itab > 0: ibreak = itab 

325 elif idot > 0: ibreak = idot 

326 elif islash > 0: ibreak = islash 

327 if ibreak < 0: 

328 ibreak = maxlen 

329 out.append(s[:ibreak+minlen+1]) 

330 s = s[ibreak+minlen+1:] 

331 out.append(s) 

332 return out