Coverage for /Users/Newville/Codes/xraylarch/larch/io/xafs_beamlines.py: 93%

343 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-09 10:08 -0600

1#!/usr/bin/env python 

2""" 

3classes for handling XAFS data in plaintext column files for various beamlines. 

4 

5 

6Basically, a class for XAFS Beamline data. This defines 

7 a) how to name the arrays for columns in the data file 

8 b) which column is most likely to hold the energy (or energy-definig) array 

9 c) what the energy units are most likely to be. 

10 

11Specific beamline data should define a class that derives from GenericBeamlineData 

12and has the following attributes/methods: 

13 

14 

15 energy_column : int index for default energy column 

16 

17 energy_units : str ('eV', 'keV', 'deg') for default expected energy units 

18 

19 beamline_matches(): method to decide whether data may be from the beamline 

20 should give more false positives than false negatives. 

21 

22 get_array_labels(): method to guess array labels. 

23 

24The XXX__BeamlineData class will be given *only* the headerlines (a list of lines) 

25from the text file. 

26 

27By default, that header will defined all the text before the data table. 

28 

29""" 

30 

31import re 

32 

33import numpy as np 

34from .fileutils import fix_varname 

35 

36def guess_beamline(header=None): 

37 """ 

38 guess beamline data class used to parse headers from header lines 

39 """ 

40 if header is None: 

41 header = [''] 

42 if len(header) > 1: 

43 line1 = header[0].lower() 

44 full = '\n'.join(header).lower() 

45 

46 if line1.startswith('#'): 

47 line1 = line1.replace('#', '') 

48 

49 if 'xdi/1' in line1 and 'epics stepscan' in line1: 

50 return APSGSE_BeamlineData 

51 elif line1.startswith('; epics scan 1 dim'): 

52 return APSGSE_BeamlineData 

53 elif 'labview control panel' in line1: 

54 return APSXSD_BeamlineData 

55 elif 'mrcat_xafs' in line1: 

56 return APSMRCAT_BeamlineData 

57 elif line1.startswith('xdac'): 

58 return NSLSXDAC_BeamlineData 

59 elif 'ssrl' in line1 and 'exafs data collector' in line1: 

60 return SSRL_BeamlineData 

61 elif 'cls data acquisition' in line1: 

62 return CLSHXMA_BeamlineData 

63 elif 'kek-pf' in line1: 

64 return KEKPF_BeamlineData 

65 elif 'exafsscan' in full and 'exafs_region' in full: 

66 return APS12BM_BeamlineData 

67 return GenericBeamlineData 

68 

69 

70class GenericBeamlineData: 

71 """ 

72 Generic beamline data file - use as last resort 

73 

74 This parses the last header line for labels: 

75 First, it remove any leading '#', '#C', '#L', and 'C' as if 

76 collected by Spec or many other collection systems. 

77 

78 Next, it removes bad characters ',#@%&' and quotes. 

79 Then, it splits on whitespace and fixes names to make 

80 sure they are valid variable names 

81 """ 

82 energy_column = 1 

83 energy_units = 'eV' 

84 mono_dspace = -1 

85 name = 'generic' 

86 

87 def __init__(self, headerlines=None): 

88 if headerlines is None: 

89 headerlines = [''] 

90 self.headerlines = list(headerlines) 

91 

92 def beamline_matches(self): 

93 return len(self.headerlines) > 1 

94 

95 def get_array_labels(self, ncolumns=None): 

96 lastline = "# " 

97 if len(self.headerlines) >= 1: 

98 lastline = self.headerlines[-1].strip() 

99 for cchars in ('#L', '#C', '#', 'C'): 

100 if lastline.startswith(cchars): 

101 lastline = lastline[len(cchars):] 

102 for badchar in '\t,#@%&"\'': 

103 lastline = lastline.replace(badchar, ' ') 

104 return self._set_labels(lastline.split(), ncolumns=ncolumns) 

105 

106 def _set_labels(self, inlabels, ncolumns=None): 

107 """ 

108 final parsing, cleaning, ensuring number of columns is satisfied 

109 """ 

110 labels = [] 

111 for i, word in enumerate(inlabels): 

112 word = word.strip().lower() 

113 if len(word) > 0: 

114 word = fix_varname(word) 

115 else: 

116 word = 'col%d' % (i+1) 

117 labels.append(word) 

118 for i, lab in enumerate(labels): 

119 if lab in labels[:i]: 

120 labels[i] = lab + '_col%d' % (i+1) 

121 

122 if ncolumns is not None and len(labels) < ncolumns: 

123 for i in range(len(labels), ncolumns): 

124 labels.append('col%d' % (i+1)) 

125 self.labels = labels 

126 return labels 

127 

128 

129class APSGSE_BeamlineData(GenericBeamlineData): 

130 """ 

131 GSECARS EpicsScan data, APS 13ID, some NSLS-II XFM 4BM data 

132 """ 

133 name = 'GSE EpicsScan' 

134 energy_column = 1 

135 

136 def __init__(self, headerlines=None): 

137 GenericBeamlineData.__init__(self, headerlines=headerlines) 

138 

139 def beamline_matches(self): 

140 line1 = '' 

141 if len(self.headerlines) > 0: 

142 line1 = self.headerlines[0].lower() 

143 return (('xdi/1' in line1 and 'epics stepscan' in line1) or 

144 line1.startswith('; epics scan 1 dim')) 

145 

146 

147 def get_array_labels(self, ncolumns=None): 

148 if not self.beamline_matches(): 

149 raise ValueError('header is not from beamline %s' % self.name) 

150 

151 line1 = self.headerlines[0].lower() 

152 oldstyle = line1.startswith('; epics scan 1 dim') 

153 

154 labels = [] 

155 if oldstyle: 

156 mode = 'search' 

157 for line in self.headerlines: 

158 line = line[1:].strip() 

159 if mode == 'found legend': 

160 if len(line) < 2 or '-->' not in line: 

161 mode = 'legend done' 

162 else: 

163 pref, suff = line.split('-->', 1) 

164 pid, arg = pref.split('=') 

165 arg = arg.replace('{', '').replace('}','') 

166 labels.append(arg.strip()) 

167 elif mode == 'search' and 'column labels:' in line: 

168 mode = 'found legend' 

169 

170 

171 else: 

172 for line in self.headerlines: 

173 if line.startswith('#'): 

174 line = line[1:].strip() 

175 else: 

176 break 

177 if line.lower().startswith('column.') and '||' in line: 

178 label, pvname = line.split('||', 1) 

179 label, entry = label.split(':') 

180 entry = entry.strip() 

181 if ' ' in entry: 

182 words = [a.strip() for a in entry.split()] 

183 if len(words) > 1: 

184 entry, units = words[0], words[1] 

185 if 'energy' in entry.lower() and len(units) > 1: 

186 self.energy_units = units 

187 labels.append(entry) 

188 return self._set_labels(labels, ncolumns=ncolumns) 

189 

190 

191class APS12BM_BeamlineData(GenericBeamlineData): 

192 """ 

193 APS sector 12BM data 

194 """ 

195 name = 'APS 12BM' 

196 energy_column = 1 

197 

198 def __init__(self, headerlines=None): 

199 GenericBeamlineData.__init__(self, headerlines=headerlines) 

200 

201 def beamline_matches(self): 

202 """ must see 'exafs_region' """ 

203 match = False 

204 if len(self.headerlines) > 0: 

205 for line in self.headerlines: 

206 if not line.startswith('#'): 

207 match = False 

208 break 

209 if 'exafs_region' in line: 

210 match = True 

211 return match 

212 

213 def get_array_labels(self, ncolumns=None): 

214 if not self.beamline_matches(): 

215 raise ValueError('header is not from beamline %s' % self.name) 

216 

217 labelline = self.headerlines[-1].replace('#C', ' ').strip() 

218 words = labelline.split() 

219 

220 labels = [] 

221 for word in words: 

222 if '_' in word: 

223 pref, suff = word.split('_') 

224 isint = False 

225 try: 

226 ipref = int(pref) 

227 isint = True 

228 except ValueError: 

229 pass 

230 if isint: labels.append(suff) 

231 elif len(labels) == 1: 

232 word = word.replace('(', '').replace(')', '') 

233 self.energy_units = word 

234 return self._set_labels(labels, ncolumns=ncolumns) 

235 

236 

237class APSMRCAT_BeamlineData(GenericBeamlineData): 

238 """ 

239 APS sector 10ID or 10BM data 

240 """ 

241 name = 'APS MRCAT' 

242 energy_column = 1 

243 

244 def __init__(self, headerlines=None): 

245 GenericBeamlineData.__init__(self, headerlines=headerlines) 

246 

247 def beamline_matches(self): 

248 line1 = '' 

249 if len(self.headerlines) > 0: 

250 line1 = self.headerlines[0] 

251 return ('MRCAT_XAFS' in line1) 

252 

253 def get_array_labels(self, ncolumns=None): 

254 if not self.beamline_matches(): 

255 raise ValueError('header is not from beamline %s' % self.name) 

256 

257 labels = [] 

258 mode = 'search' 

259 for line in self.headerlines: 

260 if mode == 'found': 

261 labels = line.strip().split() 

262 break 

263 if mode == 'search' and '-------' in line: 

264 mode = 'found' 

265 

266 return self._set_labels(labels, ncolumns=ncolumns) 

267 

268 

269class APSXSD_BeamlineData(GenericBeamlineData): 

270 """ 

271 APS sector 20ID, 20BM, 9BM 

272 """ 

273 name = 'APS XSD' 

274 energy_column = 1 

275 

276 def __init__(self, headerlines=None): 

277 GenericBeamlineData.__init__(self, headerlines=headerlines) 

278 

279 def beamline_matches(self): 

280 line1 = '' 

281 if len(self.headerlines) > 0: 

282 line1 = self.headerlines[0] 

283 return ('LabVIEW Control Panel' in line1) 

284 

285 def get_array_labels(self, ncolumns=None): 

286 if not self.beamline_matches(): 

287 raise ValueError('header is not from beamline %s' % self.name) 

288 

289 # here we try two different ways for "older" and "newer" 20BM/9BM fles 

290 labels = [] 

291 mode = 'search' 

292 tmplabels = {} 

293 maxkey = -1 

294 for line in self.headerlines: 

295 line = line[1:].strip() 

296 if mode == 'search' and 'is a readable list of column' in line: 

297 mode = 'found legend' 

298 elif mode == 'found legend': 

299 if len(line) < 2: 

300 break 

301 if ')' in line: 

302 if line.startswith('#'): 

303 line = line[1:].strip() 

304 

305 pars = [] 

306 for k in range(len(line)): 

307 if line[k] == ')': 

308 pars.append(k) 

309 

310 pars.append(len(line)) 

311 for k in range(len(pars)-1): 

312 j = pars[k] 

313 i = max(0, j-2) 

314 key = line[i:j] 

315 z = pars[k+1] 

316 if z < len(line)-3: 

317 for o in range(1, 4): 

318 try: 

319 _ = int(line[z-o]) 

320 except: 

321 break 

322 z = z-o+1 

323 val = line[j+1:z].strip() 

324 if val.endswith('*'): 

325 val = val[:-1].strip() 

326 

327 try: 

328 key = int(key) 

329 maxkey = max(maxkey, key) 

330 except: 

331 break 

332 tmplabels[key] = val 

333 

334 

335 if len(tmplabels) > 1: 

336 maxkey = max(maxkey, len(tmplabels)) 

337 labels = ['']* (maxkey+5) 

338 for k, v in tmplabels.items(): 

339 labels[k] = v 

340 labels = [o for o in labels if len(o) > 0] 

341 

342 # older version: no explicit legend, parse last header line, uses '*' 

343 if len(labels) == 0: 

344 labelline = self.headerlines[-1].replace('#', '') 

345 words = labelline.split('*') 

346 if len(words) > 1: 

347 lastword = words.pop() 

348 words.extend(lastword.split()) 

349 labels = words 

350 

351 return self._set_labels(labels, ncolumns=ncolumns) 

352 

353 

354class NSLSXDAC_BeamlineData(GenericBeamlineData): 

355 """ 

356 NSLS (I) XDAC collected data 

357 """ 

358 name = 'NSLS XDAC' 

359 energy_column = 1 

360 

361 def __init__(self, headerlines=None): 

362 GenericBeamlineData.__init__(self, headerlines=headerlines) 

363 

364 def beamline_matches(self): 

365 line1 = '' 

366 if len(self.headerlines) > 0: 

367 line1 = self.headerlines[0].replace('#', '').strip() 

368 return line1.startswith('XDAC') 

369 

370 def get_array_labels(self, ncolumns=None): 

371 if not self.beamline_matches(): 

372 raise ValueError('header is not from beamline %s' % self.name) 

373 

374 labels = [] 

375 mode = 'search' 

376 for line in self.headerlines: 

377 if mode == 'found': 

378 labels = line.strip().split() 

379 break 

380 if mode == 'search' and '-------' in line: 

381 mode = 'found' 

382 

383 return self._set_labels(labels, ncolumns=ncolumns) 

384 

385 

386class SSRL_BeamlineData(GenericBeamlineData): 

387 """ 

388 SSRL EXAFS Data Collect beamline data 

389 """ 

390 name = 'SSRL' 

391 energy_column = 1 

392 

393 def __init__(self, headerlines=None): 

394 GenericBeamlineData.__init__(self, headerlines=headerlines) 

395 

396 def beamline_matches(self): 

397 line1 = '' 

398 if len(self.headerlines) > 0: 

399 line1 = self.headerlines[0] 

400 return ('ssrl' in line1.lower() and 'exafs data collector' in line1.lower()) 

401 

402 def get_array_labels(self, ncolumns=None): 

403 if not self.beamline_matches(): 

404 raise ValueError('header is not from beamline %s' % self.name) 

405 

406 labels = [] 

407 mode = 'search' 

408 for line in self.headerlines: 

409 line = line.strip() 

410 if mode == 'found legend': 

411 if len(line) < 2: 

412 mode = 'legend done' 

413 break 

414 else: 

415 labels.append(line) 

416 if 'energy' in line.lower(): 

417 self.energy_column = len(labels) 

418 elif mode == 'search' and line == 'Data:': 

419 mode = 'found legend' 

420 

421 return self._set_labels(labels, ncolumns=ncolumns) 

422 

423 

424class CLSHXMA_BeamlineData(GenericBeamlineData): 

425 """ 

426 CLS HXMA beamline data 

427 """ 

428 name = 'CLS HXMA' 

429 energy_column = 1 

430 

431 def __init__(self, headerlines=None): 

432 GenericBeamlineData.__init__(self, headerlines=headerlines) 

433 

434 def beamline_matches(self): 

435 line1 = '' 

436 if len(self.headerlines) > 0: 

437 line1 = self.headerlines[0] 

438 return ('cls data acquisition' in line1.lower()) 

439 

440 def get_array_labels(self, ncolumns=None): 

441 if not self.beamline_matches(): 

442 raise ValueError('header is not from beamline %s' % self.name) 

443 

444 labels = [] 

445 for line in self.headerlines: 

446 line = line.strip() 

447 if line.startswith('#(1)') and '$(' in line: 

448 line = line.replace('#(1)', '') 

449 for bchar in '"#$()\t': 

450 line = line.replace(bchar, ' ') 

451 labels = line.split() 

452 

453 labels = [fix_varname(word.strip().lower()) for word in labels] 

454 for i, label in enumerate(labels): 

455 if 'energy' in label: 

456 self.energy_column = i+1 

457 return self._set_labels(labels, ncolumns=ncolumns) 

458 

459 

460class KEKPF_BeamlineData(GenericBeamlineData): 

461 """ 

462 KEK-PF (Photon Factory Data), as from BL12C 

463 """ 

464 name = 'KEK PF' 

465 energy_column = 2 

466 energy_units = 'deg' 

467 

468 def __init__(self, headerlines=None): 

469 GenericBeamlineData.__init__(self, headerlines=headerlines) 

470 

471 def beamline_matches(self): 

472 line1 = '' 

473 if len(self.headerlines) > 0: 

474 line1 = self.headerlines[0].replace('#', '').strip() 

475 return 'KEK-PF' in line1 

476 

477 def get_array_labels(self, ncolumns=None): 

478 if not self.beamline_matches(): 

479 raise ValueError('header is not from beamline %s' % self.name) 

480 

481 for line in self.headerlines: 

482 line = line.lower().replace('#', ' ').strip() 

483 if 'mono :' in line: 

484 words = ['_'] + line.replace('=', ' ').split() 

485 for i, w in enumerate(words): 

486 if i == 0: continue 

487 if words[i-1] == 'd': 

488 try: 

489 self.mono_dspace = float(w) 

490 except ValueError: 

491 pass 

492 lastline = self.headerlines[-1] 

493 ncols = len(lastline.strip().split()) 

494 if ncolumns is not None: 

495 ncols = max(ncols, ncolumns) 

496 

497 labels= ['angle_drive', 'angle_read', 'time'] 

498 return self._set_labels(labels, ncolumns=ncols)