Coverage for /Users/Newville/Codes/xraylarch/larch/xrd/amcsd.py: 10%

864 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-09 10:08 -0600

1#!/usr/bin/env python 

2""" 

3AMCIFDB: American Mineralogical CIF database as sqlite3 database/python 

4 

5Usage: 

6 amcifdb = AMCIFDB('amcif.db') 

7 

8add a CIF file: 

9 amcifdb.add_ciffile('NewFile.cif') 

10 

11generatt the text of a CIF file from index: 

12 cif_text = amcifdb.get_ciftext(300) 

13 

14OK, that looks like 'well, why not just save the CIF files'? 

15 

16And the answers are that there are simple methods for: 

17 a) getting the XRD Q points 

18 b) getting structure factors 

19 c) getting atomic clustes as for feff files 

20 d) saving Feff.inp files 

21 

22 

23""" 

24 

25import sys 

26import os 

27import re 

28import time 

29import json 

30from io import StringIO 

31from string import ascii_letters 

32from base64 import b64encode, b64decode 

33from collections import namedtuple 

34from gzip import GzipFile 

35import requests 

36from requests.packages.urllib3.exceptions import InsecureRequestWarning 

37import atexit 

38import numpy as np 

39 

40from sqlalchemy import MetaData, create_engine, func, text, and_, Table 

41from sqlalchemy import __version__ as sqla_version 

42from sqlalchemy.sql import select as sqla_select 

43from sqlalchemy.orm import sessionmaker 

44 

45 

46from .amcsd_utils import (make_engine, isAMCSD, put_optarray, get_optarray, 

47 PMG_CIF_OPTS, CifParser, SpacegroupAnalyzer) 

48 

49from xraydb.chemparser import chemparse 

50from xraydb import f0, f1_chantler, f2_chantler 

51 

52 

53from .xrd_tools import generate_hkl, d_from_hkl, twth_from_q, E_from_lambda 

54from .cif2feff import cif2feffinp 

55from ..utils import isotime, mkdir 

56from ..utils.strutils import version_ge, bytes2str 

57from ..utils.physical_constants import TAU, ATOM_SYMS 

58from ..site_config import user_larchdir 

59from .. import logger 

60 

61_CIFDB = None 

62ALL_HKLS = None 

63AMCSD_TRIM = 'amcsd_cif1.db' 

64AMCSD_FULL = 'amcsd_cif2.db' 

65 

66SOURCE_URLS = ('https://docs.xrayabsorption.org/databases/', 

67 'https://millenia.cars.aps.anl.gov/xraylarch/downloads/') 

68 

69CIF_TEXTCOLUMNS = ('formula', 'compound', 'pub_title', 'formula_title', 'a', 

70 'b', 'c', 'alpha', 'beta', 'gamma', 'cell_volume', 

71 'crystal_density', 'atoms_sites', 'atoms_x', 'atoms_y', 

72 'atoms_z', 'atoms_occupancy', 'atoms_u_iso', 

73 'atoms_aniso_label', 'atoms_aniso_u11', 'atoms_aniso_u22', 

74 'atoms_aniso_u33', 'atoms_aniso_u12', 'atoms_aniso_u13', 

75 'atoms_aniso_u23', 'qdat','url', 'hkls') 

76 

77 

78 

79CifPublication = namedtuple('CifPublication', ('id', 'journalname', 'year', 

80 'volume', 'page_first', 

81 'page_last', 'authors')) 

82 

83 

84StructureFactor = namedtuple('StructureFactor', ('q', 'intensity', 'hkl', 

85 'twotheta', 'd', 

86 'wavelength', 'energy', 

87 'f2hkl', 'degen', 'lorentz')) 

88 

89 

90# for packing/unpacking H, K, L to 2-character hash 

91HKL_ENCODE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_%' 

92def pack_hkl(h, k, l): 

93 """pack H, K, L values into 2 character sequence of 

94 printable characters for storage and transmission 

95 

96 H, K, L must be unsigned integers from 0 to 15 

97 

98 see also unpack_hkl() to reverse the process. 

99 """ 

100 if (h > 15 or k > 15 or l > 15 or 

101 h < 0 or k < 0 or l < 0): 

102 raise ValueError("hkl values out of range (max=15)") 

103 x = h*256 + k*16 + l 

104 return HKL_ENCODE[x//64] + HKL_ENCODE[x%64] 

105 

106 

107def unpack_hkl(hash): 

108 """unpack encoded H, K, L integers packed with pack_hkl()""" 

109 a, b = HKL_ENCODE.index(hash[0]), HKL_ENCODE.index(hash[1]) 

110 s = a*64 + b 

111 t = s//16 

112 return t//16, t%16, s%16 

113 

114 

115def pack_hkl_degen(hkls, degen): 

116 """pack array of H, K, L and degeneracy values into printable 

117 string for storage and transmission 

118 hkl must be an array or list of list/tuples for H, K, L, with 

119 each H, K, L value an unsigned integers from 0 to 15 

120 

121 hkls and degen must be ndarrays or lists of the same length 

122 see also unpack_hkl_degen() to reverse the process. 

123 """ 

124 if len(hkls) != len(degen): 

125 raise ValueError("hkls and degen must be the same length in pack_hkl_degen()") 

126 

127 shkl = [pack_hkl(h, k, l) for h, k, l in hkls] 

128 sdegen = json.dumps(degen.tolist()).replace(' ', '') 

129 return f"{''.join(shkl)}|{sdegen}" 

130 

131 

132def unpack_hkl_degen(sinp): 

133 """pack arrays of h, k, l and degeneracies from string stored by pack_hkl_degen 

134 see also pack_hkl_degen() 

135 """ 

136 shkl, sdegen = sinp.split('|') 

137 n = len(shkl)//2 

138 hkls = [] 

139 for i in range(n): 

140 hkls.append(unpack_hkl(shkl[2*i:2*i+2])) 

141 degen = json.loads(sdegen) 

142 return np.array(hkls), np.array(degen) 

143 

144 

145 

146def select(*args): 

147 """wrap sqlalchemy select for version 1.3 and 2.0""" 

148 # print("SELECT ", args, type(args)) 

149 # print(sqla_version, version_ge(sqla_version, '1.4.0')) 

150 if version_ge(sqla_version, '1.4.0'): 

151 return sqla_select(*args) 

152 else: 

153 return sqla_select(tuple(args)) 

154 

155 

156def get_nonzero(thing): 

157 try: 

158 if len(thing) == 1 and abs(thing[0]) < 1.e-5: 

159 return None 

160 except: 

161 pass 

162 return thing 

163 

164def clean_elemsym(sym): 

165 sx = (sym + ' ')[:2] 

166 return ''.join([s.strip() for s in sx if s in ascii_letters]) 

167 

168 

169def parse_cif_file(filename): 

170 """parse ciffile, extract data for 1st listed structure, 

171 and do some basic checks: 

172 must have formula 

173 must have spacegroup 

174 returns dat, formula, json-dumped symm_xyz 

175 """ 

176 if CifParser is None: 

177 raise ValueError("CifParser from pymatgen not available. Try 'pip install pymatgen'.") 

178 

179 cif = CifParser(filename, **PMG_CIF_OPTS) 

180 cifkey = list(cif._cif.data.keys())[0] 

181 dat = cif._cif.data[cifkey].data 

182 

183 formula = None 

184 for formname in ('_chemical_formula_sum', '_chemical_formula_moiety'): 

185 if formname in dat: 

186 try: 

187 parsed_formula = chemparse(dat[formname]) 

188 formula = dat[formname] 

189 except: 

190 pass 

191 if formula is None and '_atom_site_type_symbol' in dat: 

192 comps = {} 

193 complist = dat['_atom_site_type_symbol'] 

194 for c in complist: 

195 if c not in comps: 

196 nx = complist.count(c) 

197 comps[c] = '%s%d' % (c, nx) if nx != 1 else c 

198 formula = ''.join(comps.values()) 

199 

200 if formula is None: 

201 raise ValueError(f'Cannot read chemical formula from file {filename:s}') 

202 

203 # get spacegroup and symmetry 

204 sgroup_name = dat.get('_symmetry_space_group_name_H-M', None) 

205 if sgroup_name is None: 

206 for key, val in dat.items(): 

207 if 'space_group' in key and 'H-M' in key: 

208 sgroup_name = val 

209 

210 symm_xyz = dat.get('_space_group_symop_operation_xyz', None) 

211 if symm_xyz is None: 

212 symm_xyz = dat.get('_symmetry_equiv_pos_as_xyz', None) 

213 if symm_xyz is None: 

214 raise ValueError(f'Cannot read symmetries from file {filename:s}') 

215 

216 symm_xyz = json.dumps(symm_xyz) 

217 return dat, formula, symm_xyz 

218 

219 

220class CifStructure(): 

221 """representation of a Cif Structure 

222 """ 

223 

224 def __init__(self, ams_id=None, ams_db=None, publication=None, mineral=None, 

225 spacegroup=None, hm_symbol=None, formula_title=None, 

226 compound=None, formula=None, pub_title=None, a=None, b=None, 

227 c=None, alpha=None, beta=None, gamma=None, hkls=None, 

228 cell_volume=None, crystal_density=None, 

229 atoms_sites='<missing>', atoms_aniso_label='<missing>', 

230 atoms_x=None, atoms_y=None, atoms_z=None, 

231 atoms_occupancy=None, atoms_u_iso=None, atoms_aniso_u11=None, 

232 atoms_aniso_u22=None, atoms_aniso_u33=None, 

233 atoms_aniso_u12=None, atoms_aniso_u13=None, 

234 atoms_aniso_u23=None): 

235 

236 self.ams_id = ams_id 

237 self.ams_db = ams_db 

238 self.publication = publication 

239 self.mineral = mineral 

240 self.spacegroup = spacegroup 

241 self.hm_symbol = hm_symbol 

242 self.formula_title = formula_title 

243 self.compound = compound 

244 self.formula = formula 

245 self.pub_title = pub_title 

246 self.a = a 

247 self.b = b 

248 self.c = c 

249 self.alpha = alpha 

250 self.beta = beta 

251 self.gamma = gamma 

252 self.hkls = hkls 

253 self.cell_volume = cell_volume 

254 self.crystal_density = crystal_density 

255 self.atoms_sites = atoms_sites 

256 self.atoms_aniso_label = atoms_aniso_label 

257 self.atoms_x = atoms_x 

258 self.atoms_y = atoms_y 

259 self.atoms_z = atoms_z 

260 self.atoms_occupancy = get_nonzero(atoms_occupancy) 

261 self.atoms_u_iso = get_nonzero(atoms_u_iso) 

262 self.atoms_aniso_u11 = get_nonzero(atoms_aniso_u11) 

263 self.atoms_aniso_u22 = get_nonzero(atoms_aniso_u22) 

264 self.atoms_aniso_u33 = get_nonzero(atoms_aniso_u33) 

265 self.atoms_aniso_u12 = get_nonzero(atoms_aniso_u12) 

266 self.atoms_aniso_u13 = get_nonzero(atoms_aniso_u13) 

267 self.atoms_aniso_u23 = get_nonzero(atoms_aniso_u23) 

268 self.natoms = 0 

269 self._ciftext = None 

270 self.pmg_pstruct = None 

271 self.pmg_cstruct = None 

272 if atoms_sites not in (None, '<missing>'): 

273 self.natoms = len(atoms_sites) 

274 

275 def __repr__(self): 

276 if self.ams_id is None or self.formula is None: 

277 return '<CifStructure empty>' 

278 return f'<CifStructure, ams_id={self.ams_id:d}, formula={self.formula:s}>' 

279 

280 def get_mineralname(self): 

281 minname = self.mineral.name 

282 if minname == '<missing>': 

283 minname =self.formula_title 

284 if minname == '<missing>': 

285 minname = 'missing' 

286 return minname 

287 

288 

289 @property 

290 def ciftext(self): 

291 if self._ciftext is not None: 

292 return self._ciftext 

293 

294 out = ['data_global'] 

295 if self.formula_title != '<missing>': 

296 out.append(f"_amcsd_formula_title '{self.formula_title:s}'") 

297 

298 if self.mineral.name != '<missing>': 

299 out.append(f"_chemical_name_mineral '{self.mineral.name:s}'") 

300 out.append('loop_') 

301 out.append('_publ_author_name') 

302 for a in self.publication.authors: 

303 out.append(f"'{a:s}'") 

304 

305 out.append(f"_journal_name_full '{self.publication.journalname}'") 

306 out.append(f"_journal_volume {self.publication.volume}") 

307 out.append(f"_journal_year {self.publication.year}") 

308 out.append(f"_journal_page_first {self.publication.page_first}") 

309 out.append(f"_journal_page_last {self.publication.page_last}") 

310 out.append('_publ_section_title') 

311 out.append(';') 

312 out.append(f"{self.pub_title:s}") 

313 out.append(';') 

314 out.append(f"_database_code_amcsd {self.ams_id:07d}") 

315 if self.compound != '<missing>': 

316 out.append(f"_chemical_compound_source '{self.compound}'") 

317 out.append(f"_chemical_formula_sum '{self.formula}'") 

318 out.append(f"_cell_length_a {self.a}") 

319 out.append(f"_cell_length_b {self.b}") 

320 out.append(f"_cell_length_c {self.c}") 

321 out.append(f"_cell_angle_alpha {self.alpha}") 

322 out.append(f"_cell_angle_beta {self.beta}") 

323 out.append(f"_cell_angle_gamma {self.gamma}") 

324 out.append(f"_cell_volume {self.cell_volume}") 

325 out.append(f"_exptl_crystal_density_diffrn {self.crystal_density}") 

326 out.append(f"_symmetry_space_group_name_H-M '{self.hm_symbol}'") 

327 out.append('loop_') 

328 out.append('_space_group_symop_operation_xyz') 

329 for xyzop in json.loads(self.spacegroup.symmetry_xyz): 

330 out.append(f" '{xyzop:s}'") 

331 

332 atoms_sites = self.atoms_sites 

333 if atoms_sites not in (None, 'None', '0', '<missing>'): 

334 out.append('loop_') 

335 out.append('_atom_site_label') 

336 out.append('_atom_site_fract_x') 

337 out.append('_atom_site_fract_y') 

338 out.append('_atom_site_fract_z') 

339 

340 

341 natoms = len(atoms_sites) 

342 atoms_x = self.atoms_x 

343 atoms_y = self.atoms_y 

344 atoms_z = self.atoms_z 

345 atoms_occ = self.atoms_occupancy 

346 atoms_u_iso = self.atoms_u_iso 

347 if atoms_occ is not None: 

348 out.append('_atom_site_occupancy') 

349 if atoms_u_iso is not None: 

350 out.append('_atom_site_U_iso_or_equiv') 

351 for i in range(natoms): 

352 adat = f"{atoms_sites[i]} {atoms_x[i]} {atoms_y[i]} {atoms_z[i]}" 

353 if atoms_occ is not None: 

354 adat += f" {atoms_occ[i]}" 

355 if atoms_u_iso is not None: 

356 adat += f" {atoms_u_iso[i]}" 

357 out.append(adat) 

358 

359 aniso_label = self.atoms_aniso_label 

360 if aniso_label not in (None, '0', '<missing>'): 

361 out.append('loop_') 

362 out.append('_atom_site_aniso_label') 

363 out.append('_atom_site_aniso_U_11') 

364 out.append('_atom_site_aniso_U_22') 

365 out.append('_atom_site_aniso_U_33') 

366 out.append('_atom_site_aniso_U_12') 

367 out.append('_atom_site_aniso_U_13') 

368 out.append('_atom_site_aniso_U_23') 

369 natoms = len(aniso_label) 

370 u11 = self.atoms_aniso_u11 

371 u22 = self.atoms_aniso_u22 

372 u33 = self.atoms_aniso_u33 

373 u12 = self.atoms_aniso_u12 

374 u13 = self.atoms_aniso_u13 

375 u23 = self.atoms_aniso_u23 

376 

377 for i in range(natoms): 

378 out.append(f"{aniso_label[i]} {u11[i]} {u22[i]} {u33[i]} {u12[i]} {u13[i]} {u23[i]}") 

379 

380 out.append('') 

381 out.append('') 

382 self._ciftext = '\n'.join(out) 

383 return self.ciftext 

384 

385 

386 def find_hkls(self, nmax=64, qmax=10, wavelength=0.75): 

387 """find the HKLs and degeneracies of the strongest reflections 

388 

389 this will calculate structure factors, and sort them, but the 

390 purpose is really to do a filter to find the strongest HKLs that 

391 can then be saved and restored for structure factor calcs using 

392 only the most important HKL values. 

393 

394 returns hkls, degen of the nmax reflections with the highest scattered intensity 

395 """ 

396 self.get_pmg_struct() 

397 

398 pstruct = self.pmg_pstruct 

399 cstruct = self.pmg_cstruct 

400 if pstruct is None: 

401 print(f"pymatgen could not parse CIF structure for CIF {self.ams_id}") 

402 return 

403 

404 global ALL_HKLS 

405 if ALL_HKLS is None: 

406 ALL_HKLS = generate_hkl(hmax=15, kmax=15, lmax=15, positive_only=False) 

407 

408 hkls = ALL_HKLS[:] 

409 unitcell = self.get_unitcell() 

410 qhkls = TAU / d_from_hkl(hkls, **unitcell) 

411 

412 # remove q values outside of range 

413 qfilt = (qhkls < qmax) 

414 qhkls = qhkls[qfilt] 

415 hkls = hkls[qfilt] 

416 

417 # find duplicate q-values, set degen 

418 # scale up q values to better find duplicates 

419 qscaled = [int(round(q*1.e9)) for q in qhkls] 

420 q_unique, q_degen, hkl_unique = [], [], [] 

421 for i, q in enumerate(qscaled): 

422 if q in q_unique: 

423 q_degen[q_unique.index(q)] += 1 

424 else: 

425 q_unique.append(q) 

426 q_degen.append(1) 

427 hkl_unique.append(hkls[i]) 

428 

429 qorder = np.argsort(q_unique) 

430 qhkls = 1.e-9*np.array(q_unique)[qorder] 

431 hkls = abs(np.array(hkl_unique)[qorder]) 

432 degen = np.array(q_degen)[qorder] 

433 

434 # note the f2 is calculated here without resonant corrections 

435 f2 = self.calculate_f2(hkls, qhkls=qhkls, wavelength=None) 

436 

437 # filter out very small structure factors 

438 ffilt = (f2 > 1.e-6*max(f2)) 

439 qhkls = qhkls[ffilt] 

440 hkls = hkls[ffilt] 

441 degen = degen[ffilt] 

442 f2 = f2[ffilt] 

443 

444 # lorentz and polarization correction 

445 arad = (TAU/360)*twth_from_q(qhkls, wavelength) 

446 corr = (1+np.cos(arad)**2)/(np.sin(arad/2)**2*np.cos(arad/2)) 

447 

448 intensity = f2 * degen * corr 

449 ifilt = (intensity > 0.005*max(intensity)) 

450 

451 intensity = intensity[ifilt] / max(intensity) 

452 qhkls = qhkls[ifilt] 

453 hkls = hkls[ifilt] 

454 degen = degen[ifilt] 

455 

456 # indices of peaks in descending order of intensity 

457 main_peaks = np.argsort(intensity)[::-1][:nmax] 

458 

459 hkls_main, degen_main = hkls[main_peaks], degen[main_peaks] 

460 if self.ams_db is not None: 

461 self.hkls = self.ams_db.set_hkls(self.ams_id, hkls_main, degen_main) 

462 

463 return hkls_main, degen_main 

464 

465 def get_structure_factors(self, wavelength=0.75): 

466 """given arrays of HKLs and degeneracies (perhaps from find_hkls(), 

467 return structure factors 

468 

469 This is a lot like find_hkls(), but with the assumption that HKLs 

470 are not to be filtered or altered. 

471 """ 

472 if self.hkls is None: 

473 self.find_hkls(nmax=64, qmax=10, wavelength=wavelength) 

474 

475 hkls, degen = unpack_hkl_degen(self.hkls) 

476 

477 self.get_pmg_struct() 

478 pstruct = self.pmg_pstruct 

479 if pstruct is None: 

480 print(f"pymatgen could not parse CIF structure for CIF {self.ams_id}") 

481 return 

482 

483 unitcell = self.get_unitcell() 

484 dhkls = d_from_hkl(hkls, **unitcell) 

485 qhkls = TAU / dhkls 

486 

487 # sort by q 

488 qsort = np.argsort(qhkls) 

489 qhkls = qhkls[qsort] 

490 dhkls = dhkls[qsort] 

491 hkls = hkls[qsort] 

492 degen = degen[qsort] 

493 

494 energy = E_from_lambda(wavelength, E_units='eV') 

495 

496 f2hkl = self.calculate_f2(hkls, qhkls=qhkls, wavelength=wavelength) 

497 

498 # lorentz and polarization correction 

499 twoth = twth_from_q(qhkls, wavelength) 

500 arad = (TAU/360)*twoth 

501 corr = (1+np.cos(arad)**2)/(np.sin(arad/2)**2*np.cos(arad/2)) 

502 

503 intensity = f2hkl * degen * corr 

504 

505 return StructureFactor(q=qhkls, intensity=intensity, hkl=hkls, d=dhkls, 

506 f2hkl=f2hkl, twotheta=twoth, degen=degen, 

507 lorentz=corr, wavelength=wavelength, 

508 energy=energy) 

509 

510 

511 def calculate_f2(self, hkls, qhkls=None, energy=None, wavelength=None): 

512 """calculate F*F'. 

513 

514 If wavelength (in Ang) or energy (in eV) is not None, then 

515 resonant corrections will be included. 

516 """ 

517 if qhkls is None: 

518 unitcell = self.get_unitcell() 

519 qhkls = TAU / d_from_hkl(hkls, **unitcell) 

520 sq = qhkls/(2*TAU) 

521 sites = self.get_sites() 

522 

523 if energy is None and wavelength is not None: 

524 energy = E_from_lambda(wavelength, E_units='eV') 

525 

526 # get f0 and resonant scattering factors 

527 f0vals, f1vals, f2vals = {}, {}, {} 

528 for elem in sites.keys(): 

529 if elem not in f0vals: 

530 f0vals[elem] = f0(elem, sq) 

531 if energy is not None: 

532 f1vals[elem] = f1_chantler(elem, energy) 

533 f2vals[elem] = f2_chantler(elem, energy) 

534 

535 # and f2 

536 f2 = np.zeros(len(hkls)) 

537 for i, hkl in enumerate(hkls): 

538 fsum = 0. 

539 for elem in f0vals: 

540 fval = f0vals[elem][i] 

541 if energy is not None: 

542 fval += f1vals[elem] - 1j*f2vals[elem] 

543 for occu, fcoord in sites[elem]: 

544 fsum += fval*occu*np.exp(1j*TAU*(fcoord*hkl).sum()) 

545 f2[i] = (fsum*fsum.conjugate()).real 

546 return f2 

547 

548 

549 def get_pmg_struct(self): 

550 if self.pmg_cstruct is not None and self.pmg_pstruct is not None: 

551 return 

552 

553 try: 

554 pmcif = CifParser(StringIO(self.ciftext), **PMG_CIF_OPTS) 

555 self.pmg_cstruct = pmcif.get_structures()[0] 

556 self.pmg_pstruct = SpacegroupAnalyzer(self.pmg_cstruct 

557 ).get_conventional_standard_structure() 

558 except: 

559 print(f"pymatgen could not parse CIF structure for CIF {self.ams_id}") 

560 

561 

562 def get_unitcell(self): 

563 "unitcell as dict, from PMG structure" 

564 self.get_pmg_struct() 

565 pstruct = self.pmg_pstruct 

566 if pstruct is None: 

567 print(f"pymatgen could not parse CIF structure for CIF {self.ams_id}") 

568 return 

569 pdict = pstruct.as_dict() 

570 unitcell = {} 

571 for a in ('a', 'b', 'c', 'alpha', 'beta', 'gamma', 'volume'): 

572 unitcell[a] = pdict['lattice'][a] 

573 return unitcell 

574 

575 def get_sites(self): 

576 "dictionary of sites, from PMG structure" 

577 self.get_pmg_struct() 

578 pstruct = self.pmg_pstruct 

579 if pstruct is None: 

580 print(f"pymatgen could not parse CIF structure for CIF {self.ams_id}") 

581 return 

582 

583 sites = {} 

584 for site in pstruct.sites: 

585 sdat = site.as_dict() 

586 fcoords = sdat['abc'] 

587 

588 for spec in sdat['species']: 

589 elem = spec['element'] 

590 if elem == 'Nh': elem = 'N' 

591 if elem == 'Og': 

592 elem = 'O' 

593 if elem in ('Hs', 'D'): 

594 elem = 'H' 

595 if elem.startswith('Dh') or elem.startswith('Dd') or elem.startswith('Dw'): 

596 elem = 'H' 

597 if elem == 'Fl': 

598 elem = 'F' 

599 occu = spec['occu'] 

600 if elem not in sites: 

601 sites[elem] = [(occu, fcoords)] 

602 else: 

603 sites[elem].append([occu, fcoords]) 

604 return sites 

605 

606 

607 

608 def get_feffinp(self, absorber, edge=None, cluster_size=8.0, absorber_site=1, 

609 with_h=False, version8=True): 

610 pub = self.publication 

611 journal = f"{pub.journalname} {pub.volume}, pp. {pub.page_first}-{pub.page_last} ({pub.year:d})" 

612 authors = ', '.join(pub.authors) 

613 titles = [f'Structure from AMCSD, AMS_ID: {self.ams_id:d}', 

614 f'Mineral Name: {self.mineral.name:s}'] 

615 

616 if not self.formula_title.startswith('<missing'): 

617 titles.append(f'Formula Title: {self.formula_title}') 

618 

619 titles.extend([f'Journal: {journal}', f'Authors: {authors}']) 

620 if not self.pub_title.startswith('<missing'): 

621 for i, line in enumerate(self.pub_title.split('\n')): 

622 titles.append(f'Title{i+1:d}: {line}') 

623 

624 return cif2feffinp(self.ciftext, absorber, edge=edge, 

625 cluster_size=cluster_size, with_h=with_h, 

626 absorber_site=absorber_site, 

627 extra_titles=titles, version8=version8) 

628 

629 def save_feffinp(self, absorber, edge=None, cluster_size=8.0, absorber_site=1, 

630 filename=None, version8=True): 

631 feff6text = self.get_feffinp(absorber, edge=edge, cluster_size=cluster_size, 

632 absorber_site=absorber_site, version8=version8) 

633 if filename is None: 

634 min_name = self.mineral.name.lower() 

635 if min_name in ('', '<missing>', 'None'): 

636 name = f'{absorber:s}_{edge:s}_CIF{self.ams_id:06d}' 

637 else: 

638 name = f'{absorber:s}_{edge:s}_{min_name:s}_CIF{self.ams_id:06d}' 

639 

640 ffolder = os.path.join(user_larchdir, 'feff', name) 

641 mkdir(ffolder) 

642 filename = os.path.join(ffolder, 'feff.inp') 

643 with open(filename, 'w', encoding=sys.getdefaultencoding()) as fh: 

644 fh.write(feff6text) 

645 return filename 

646 

647class AMCSD(): 

648 """ 

649 Database of CIF structure data from the American Mineralogical Crystal Structure Database 

650 

651 http://rruff.geo.arizona.edu/AMS/amcsd.php 

652 

653 """ 

654 def __init__(self, dbname=None, read_only=False): 

655 "connect to an existing database" 

656 if dbname is None: 

657 parent, _ = os.path.split(__file__) 

658 dbname = os.path.join(parent, AMCSD_TRIM) 

659 if not os.path.exists(dbname): 

660 raise IOError("Database '%s' not found!" % dbname) 

661 

662 if not isAMCSD(dbname): 

663 raise ValueError("'%s' is not a valid AMCSD Database!" % dbname) 

664 

665 self.connect(dbname, read_only=read_only) 

666 atexit.register(self.finalize_amcsd) 

667 ciftab = self.tables['cif'] 

668 for colname in CIF_TEXTCOLUMNS: 

669 if colname not in ciftab.columns and not read_only: 

670 self.session.execute(text(f'alter table cif add column {colname} text')) 

671 self.close() 

672 self.connect(dbname, read_only=read_only) 

673 time.sleep(0.1) 

674 self.insert('version', tag=f'with {colname}', date=isotime(), 

675 notes=f'added {colname} column to cif table') 

676 

677 def finalize_amcsd(self): 

678 conn = getattr(self, 'conn', None) 

679 if conn is not None: 

680 conn.close() 

681 

682 def connect(self, dbname, read_only=False): 

683 self.dbname = dbname 

684 self.engine = make_engine(dbname) 

685 self.conn = self.engine.connect() 

686 kwargs = {'bind': self.engine, 'autoflush': True, 'autocommit': False} 

687 self.session = sessionmaker(**kwargs)() 

688 if read_only: 

689 def readonly_flush(*args, **kwargs): 

690 return 

691 self.session.flush = readonly_flush 

692 

693 self.metadata = MetaData() 

694 self.metadata.reflect(bind=self.engine) 

695 self.tables = self.metadata.tables 

696 self.cif_elems = None 

697 

698 def close(self): 

699 "close session" 

700 self.session.flush() 

701 self.session.close() 

702 

703 def query(self, *args, **kws): 

704 "generic query" 

705 return self.session.query(*args, **kws) 

706 

707 def insert(self, tablename, **kws): 

708 if isinstance(tablename, Table): 

709 table = tablename 

710 else: 

711 table = self.tables[tablename] 

712 stmt = table.insert().values(kws) 

713 out = self.session.execute(stmt) 

714 self.session.commit() 

715 self.session.flush() 

716 

717 def update(self, tablename, whereclause=False, **kws): 

718 if isinstance(tablename, Table): 

719 table = tablename 

720 else: 

721 table = self.tables[tablename] 

722 

723 stmt = table.update().where(whereclause).values(kws) 

724 out = self.session.execute(stmt) 

725 self.session.commit() 

726 self.session.flush() 

727 

728 def execall(self, query): 

729 return self.session.execute(query).fetchall() 

730 

731 def execone(self, query): 

732 results = self.session.execute(query).fetchone() 

733 if results is None or len(results) < 1: 

734 return None 

735 return results 

736 

737 def get_all(self, tablename): 

738 return self.execall(self.tables[tablename].select()) 

739 

740 

741 def get_version(self, long=False, with_history=False): 

742 """ 

743 return sqlite3 database and python library version numbers 

744 

745 Parameters: 

746 long (bool): show timestamp and notes of latest version [False] 

747 with_history (bool): show complete version history [False] 

748 

749 Returns: 

750 string: version information 

751 """ 

752 out = [] 

753 rows = self.get_all('version') 

754 if not with_history: 

755 rows = rows[-1:] 

756 if long or with_history: 

757 for row in rows: 

758 out.append(f"AMCSD Version: {row.tag} [{row.date}] '{row.notes}'") 

759 out.append(f"Python Version: {__version__}") 

760 out = "\n".join(out) 

761 elif rows is None: 

762 out = f"AMCSD Version: unknown, Python Version: {__version__}" 

763 else: 

764 out = f"AMCSD Version: {rows[0].tag}, Python Version: {__version__}" 

765 return out 

766 

767 def _get_tablerow(self, table, name, add=True): 

768 tab = self.tables[table] 

769 if '"' in name: 

770 name = name.replace('"', '\"') 

771 rows = self.execall(tab.select().where(tab.c.name==name)) 

772 if len(rows) == 0: 

773 if not add: 

774 return None 

775 self.insert(tab, name=name) 

776 rows = self.execall(tab.select().where(tab.c.name==name)) 

777 return rows[0] 

778 

779 def get_spacegroup(self, hm_name): 

780 """get row from spacegroups table by HM notation. See add_spacegroup() 

781 """ 

782 tab = self.tables['spacegroups'] 

783 rows = self.execall(tab.select().where(tab.c.hm_notation==hm_name)) 

784 if len(rows) >0: 

785 return rows[0] 

786 return None 

787 

788 

789 def add_spacegroup(self, hm_name, symmetry_xyz, category=None): 

790 """add entry to spacegroups table, including HM notation and CIF symmetry operations 

791 """ 

792 sg = self.get_spacegroup(hm_name) 

793 if sg is not None and sg.symmetry_xyz == symmetry_xyz: 

794 return sg 

795 

796 args = {'hm_notation': hm_name, 'symmetry_xyz': symmetry_xyz} 

797 if category is not None: 

798 args['category'] = category 

799 self.insert('spacegroups', **args) 

800 return self.get_spacegroup(hm_name) 

801 

802 def get_publications(self, journalname=None, year=None, volume=None, 

803 page_first=None, page_last=None, id=None): 

804 """get rows from publications table by journalname, year (required) 

805 and optionally volume, page_first, or page_last. 

806 """ 

807 tab = self.tables['publications'] 

808 

809 args = [] 

810 if journalname is not None: 

811 args.append(func.lower(tab.c.journalname)==journalname.lower()) 

812 if year is not None: 

813 args.append(tab.c.year==int(year)) 

814 if volume is not None: 

815 args.append(tab.c.volume==str(volume)) 

816 if page_first is not None: 

817 args.append(tab.c.page_first==str(page_first)) 

818 if page_last is not None: 

819 args.append(tab.c.page_last==str(page_last)) 

820 if id is not None: 

821 args.append(tab.c.id==id) 

822 

823 rows = self.execall(tab.select().where(and_(*args))) 

824 if len(rows) > 0: 

825 out = [] 

826 authtab = self.tables['authors'] 

827 patab = self.tables['publication_authors'] 

828 for row in rows: 

829 q = select(authtab.c.name).where(and_(authtab.c.id==patab.c.author_id, 

830 patab.c.publication_id==row.id)) 

831 authors = tuple([i[0] for i in self.execall(q)]) 

832 out.append(CifPublication(row.id, row.journalname, row.year, 

833 row.volume, row.page_first, 

834 row.page_last, authors)) 

835 return out 

836 return None 

837 

838 

839 def add_publication(self, journalname, year, authorlist, volume=None, 

840 page_first=None, page_last=None, with_authors=True): 

841 

842 args = dict(journalname=journalname, year=year) 

843 if volume is not None: 

844 args['volume'] = volume 

845 if page_first is not None: 

846 args['page_first'] = page_first 

847 if page_last is not None: 

848 args['page_last'] = page_last 

849 

850 self.insert('publications', **args) 

851 self.session.flush() 

852 pub = self.get_publications(journalname, year, volume=volume, 

853 page_first=page_first, 

854 page_last=page_last)[0] 

855 

856 if with_authors: 

857 for name in authorlist: 

858 auth = self._get_tablerow('authors', name, add=True) 

859 self.insert('publication_authors', 

860 publication_id=pub.id, author_id=auth.id) 

861 return pub 

862 

863 def add_cifdata(self, cif_id, mineral_id, publication_id, 

864 spacegroup_id, formula=None, compound=None, 

865 formula_title=None, pub_title=None, a=None, b=None, 

866 c=None, alpha=None, beta=None, gamma=None, url='', 

867 cell_volume=None, crystal_density=None, 

868 atoms_sites=None, atoms_x=None, atoms_y=None, 

869 atoms_z=None, atoms_occupancy=None, atoms_u_iso=None, 

870 atoms_aniso_label=None, atoms_aniso_u11=None, 

871 atoms_aniso_u22=None, atoms_aniso_u33=None, 

872 atoms_aniso_u12=None, atoms_aniso_u13=None, 

873 atoms_aniso_u23=None, with_elements=True): 

874 

875 self.insert('cif', id=cif_id, mineral_id=mineral_id, 

876 publication_id=publication_id, 

877 spacegroup_id=spacegroup_id, 

878 formula_title=formula_title, pub_title=pub_title, 

879 formula=formula, compound=compound, url=url, a=a, b=b, 

880 c=c, alpha=alpha, beta=beta, gamma=gamma, 

881 cell_volume=cell_volume, 

882 crystal_density=crystal_density, 

883 atoms_sites=atoms_sites, atoms_x=atoms_x, 

884 atoms_y=atoms_y, atoms_z=atoms_z, 

885 atoms_occupancy=atoms_occupancy, 

886 atoms_u_iso=atoms_u_iso, 

887 atoms_aniso_label=atoms_aniso_label, 

888 atoms_aniso_u11=atoms_aniso_u11, 

889 atoms_aniso_u22=atoms_aniso_u22, 

890 atoms_aniso_u33=atoms_aniso_u33, 

891 atoms_aniso_u12=atoms_aniso_u12, 

892 atoms_aniso_u13=atoms_aniso_u13, 

893 atoms_aniso_u23=atoms_aniso_u23) 

894 

895 if with_elements: 

896 for element in chemparse(formula).keys(): 

897 self.insert('cif_elements', cif_id=cif_id, element=element) 

898 return self.get_cif(cif_id) 

899 

900 

901 def add_ciffile(self, filename, cif_id=None, url='', debug=False): 

902 

903 if CifParser is None: 

904 raise ValueError("CifParser from pymatgen not available. Try 'pip install pymatgen'.") 

905 try: 

906 dat, formula, symm_xyz = parse_cif_file(filename) 

907 except: 

908 raise ValueError(f"unknown error trying to parse CIF file: {filename}") 

909 

910 # compound 

911 compound = '<missing>' 

912 for compname in ('_chemical_compound_source', 

913 '_chemical_name_systematic', 

914 '_chemical_name_common'): 

915 if compname in dat: 

916 compound = dat[compname] 

917 

918 

919 # spacegroup 

920 sgroup_name = dat.get('_symmetry_space_group_name_H-M', None) 

921 if sgroup_name is None: 

922 for key, val in dat.items(): 

923 if 'space_group' in key and 'H-M' in key: 

924 sgroup_name = val 

925 

926 sgroup = self.get_spacegroup(sgroup_name) 

927 if sgroup is not None and sgroup.symmetry_xyz != symm_xyz: 

928 for i in range(1, 11): 

929 tgroup_name = sgroup_name + f' %var{i:d}%' 

930 sgroup = self.get_spacegroup(tgroup_name) 

931 if sgroup is None or sgroup.symmetry_xyz == symm_xyz: 

932 sgroup_name = tgroup_name 

933 break 

934 if sgroup is None: 

935 sgroup = self.add_spacegroup(sgroup_name, symm_xyz) 

936 

937 min_name = '<missing>' 

938 for mname in ('_chemical_name_mineral', 

939 '_chemical_name_common'): 

940 if mname in dat: 

941 min_name = dat[mname] 

942 mineral = self._get_tablerow('minerals', min_name) 

943 

944 # get publication data (including ISCD style of 'citation' in place of 'journal' ) 

945 pubdict = dict(journalname=dat.get('_journal_name_full', None), 

946 year=dat.get('_journal_year', None), 

947 volume=dat.get('_journal_volume', None), 

948 page_first=dat.get('_journal_page_first', None), 

949 page_last=dat.get('_journal_page_last', None)) 

950 

951 for key, alt, dval in (('journalname', 'journal_full', 'No Journal'), 

952 ('year', None, -1), 

953 ('volume', 'journal_volume', 0), 

954 ('page_first', None, 0), 

955 ('page_last', None, 0)): 

956 if pubdict[key] is None: 

957 if alt is None: 

958 alt = key 

959 alt = '_citation_%s' % alt 

960 pubdict[key] = dat.get(alt, [dval])[0] 

961 authors = dat.get('_publ_author_name', None) 

962 if authors is None: 

963 authors = dat.get('_citation_author_name', ['Anonymous']) 

964 

965 pubs = self.get_publications(**pubdict) 

966 if pubs is None: 

967 pub = self.add_publication(pubdict['journalname'], 

968 pubdict['year'], authors, 

969 volume=pubdict['volume'], 

970 page_first=pubdict['page_first'], 

971 page_last=pubdict['page_last']) 

972 else: 

973 pub = pubs[0] 

974 

975 density = dat.get('_exptl_crystal_density_meas', None) 

976 if density is None: 

977 density = dat.get('_exptl_crystal_density_diffrn', -1.0) 

978 

979 if cif_id is None: 

980 cif_id = dat.get('_database_code_amcsd', None) 

981 if cif_id is None: 

982 cif_id = dat.get('_cod_database_code', None) 

983 if cif_id is None: 

984 cif_id = self.next_cif_id() 

985 cif_id = int(cif_id) 

986 

987 # check again for this cif id (must match CIF AMS id and formula 

988 tabcif = self.tables['cif'] 

989 this = self.execone(select(tabcif.c.id, tabcif.c.formula 

990 ).where(tabcif.c.id==int(cif_id))) 

991 if this is not None: 

992 _cid, _formula = this 

993 if formula.replace(' ', '') == _formula.replace(' ', ''): 

994 return cif_id 

995 else: 

996 cif_id = self.next_cif_id() 

997 

998 if debug: 

999 print("##CIF Would add Cif Data !" ) 

1000 print(cif_id, mineral.id, pub.id, sgroup.id) 

1001 print("##CIF formuala / compound: ", formula, compound) 

1002 print("titles: ", 

1003 dat.get('_amcsd_formula_title', '<missing>'), 

1004 dat.get('_publ_section_title', '<missing>')) 

1005 print("##CIF atom sites :", json.dumps(dat['_atom_site_label'])) 

1006 print("##CIF locations : ", 

1007 put_optarray(dat, '_atom_site_fract_x'), 

1008 put_optarray(dat, '_atom_site_fract_y'), 

1009 put_optarray(dat, '_atom_site_fract_z'), 

1010 put_optarray(dat, '_atom_site_occupancy'), 

1011 put_optarray(dat, '_atom_site_U_iso_or_equiv')) 

1012 print("##CIF aniso label : ", 

1013 json.dumps(dat.get('_atom_site_aniso_label', '<missing>'))) 

1014 print("##CIF aniso : ", 

1015 put_optarray(dat, '_atom_site_aniso_U_11'), 

1016 put_optarray(dat, '_atom_site_aniso_U_22'), 

1017 put_optarray(dat, '_atom_site_aniso_U_33'), 

1018 put_optarray(dat, '_atom_site_aniso_U_12'), 

1019 put_optarray(dat, '_atom_site_aniso_U_13'), 

1020 put_optarray(dat, '_atom_site_aniso_U_23')) 

1021 print('##CIF cell data: ', dat['_cell_length_a'], 

1022 dat['_cell_length_b'], 

1023 dat['_cell_length_c'], 

1024 dat['_cell_angle_alpha'], 

1025 dat['_cell_angle_beta'], 

1026 dat['_cell_angle_gamma']) 

1027 print("##CIF volume/ density ", dat.get('_cell_volume', -1), density) 

1028 print("##CIF url : ", type(url), url) 

1029 

1030 self.add_cifdata(cif_id, mineral.id, pub.id, sgroup.id, 

1031 formula=formula, compound=compound, 

1032 formula_title=dat.get('_amcsd_formula_title', '<missing>'), 

1033 pub_title=dat.get('_publ_section_title', '<missing>'), 

1034 atoms_sites=json.dumps(dat['_atom_site_label']), 

1035 atoms_x=put_optarray(dat, '_atom_site_fract_x'), 

1036 atoms_y=put_optarray(dat, '_atom_site_fract_y'), 

1037 atoms_z=put_optarray(dat, '_atom_site_fract_z'), 

1038 atoms_occupancy=put_optarray(dat, '_atom_site_occupancy'), 

1039 atoms_u_iso=put_optarray(dat, '_atom_site_U_iso_or_equiv'), 

1040 atoms_aniso_label=json.dumps(dat.get('_atom_site_aniso_label', '<missing>')), 

1041 atoms_aniso_u11=put_optarray(dat, '_atom_site_aniso_U_11'), 

1042 atoms_aniso_u22=put_optarray(dat, '_atom_site_aniso_U_22'), 

1043 atoms_aniso_u33=put_optarray(dat, '_atom_site_aniso_U_33'), 

1044 atoms_aniso_u12=put_optarray(dat, '_atom_site_aniso_U_12'), 

1045 atoms_aniso_u13=put_optarray(dat, '_atom_site_aniso_U_13'), 

1046 atoms_aniso_u23=put_optarray(dat, '_atom_site_aniso_U_23'), 

1047 a=dat['_cell_length_a'], 

1048 b=dat['_cell_length_b'], 

1049 c=dat['_cell_length_c'], 

1050 alpha=dat['_cell_angle_alpha'], 

1051 beta=dat['_cell_angle_beta'], 

1052 gamma=dat['_cell_angle_gamma'], 

1053 cell_volume=dat.get('_cell_volume', -1), 

1054 crystal_density=density, 

1055 url=url) 

1056 return cif_id 

1057 

1058 def get_cif(self, cif_id, as_strings=False): 

1059 """get Cif Structure object """ 

1060 tab = self.tables['cif'] 

1061 

1062 cif = self.execone(tab.select().where(tab.c.id==cif_id)) 

1063 if cif is None: 

1064 return 

1065 

1066 tab_pub = self.tables['publications'] 

1067 tab_auth = self.tables['authors'] 

1068 tab_pa = self.tables['publication_authors'] 

1069 tab_min = self.tables['minerals'] 

1070 tab_sp = self.tables['spacegroups'] 

1071 mineral = self.execone(tab_min.select().where(tab_min.c.id==cif.mineral_id)) 

1072 sgroup = self.execone(tab_sp.select().where(tab_sp.c.id==cif.spacegroup_id)) 

1073 hm_symbol = sgroup.hm_notation 

1074 if '%var' in hm_symbol: 

1075 hm_symbol = hm_symbol.split('%var')[0] 

1076 

1077 pub = self.get_publications(id=cif.publication_id)[0] 

1078 

1079 out = CifStructure(ams_id=cif_id, publication=pub, 

1080 mineral=mineral, spacegroup=sgroup, 

1081 hm_symbol=hm_symbol, ams_db=self) 

1082 

1083 for attr in ('formula_title', 'compound', 'formula', 'pub_title'): 

1084 setattr(out, attr, getattr(cif, attr, '<missing>')) 

1085 for attr in ('a', 'b', 'c', 'alpha', 'beta', 'gamma', 

1086 'cell_volume', 'crystal_density'): 

1087 val = getattr(cif, attr, '-1') 

1088 if not as_strings: 

1089 if val is not None: 

1090 if '(' in val: 

1091 val = val.split('(')[0] 

1092 if ',' in val and '.' not in val: 

1093 val = val.replace(',', '.') 

1094 try: 

1095 val = float(val) 

1096 except: 

1097 pass 

1098 setattr(out, attr, val) 

1099 

1100 for attr in ('atoms_sites', 'atoms_aniso_label'): 

1101 val = getattr(cif, attr, '<missing>') 

1102 val = '<missing>' if val in (None, '<missing>') else json.loads(val) 

1103 setattr(out, attr, val) 

1104 

1105 if out.atoms_sites not in (None, '<missing>'): 

1106 out.natoms = len(out.atoms_sites) 

1107 for attr in ('atoms_x', 'atoms_y', 'atoms_z', 'atoms_occupancy', 

1108 'atoms_u_iso', 'atoms_aniso_u11', 'atoms_aniso_u22', 

1109 'atoms_aniso_u33', 'atoms_aniso_u12', 

1110 'atoms_aniso_u13', 'atoms_aniso_u23'): 

1111 try: 

1112 val = get_optarray(getattr(cif, attr)) 

1113 if val == '0': 

1114 val = None 

1115 elif not as_strings: 

1116 tmp = [] 

1117 for i in range(len(val)): 

1118 v = val[i] 

1119 if v in ('?', '.'): 

1120 v = 2. 

1121 else: 

1122 v = float(v) 

1123 tmp.append(v) 

1124 val = tmp 

1125 setattr(out, attr, val) 

1126 except: 

1127 print(f"could not parse CIF entry for {cif_id} '{attr}': {val} ") 

1128 

1129 # we're now ignoring per-cif qvalues 

1130 # out.qval = None 

1131 # if cif.qdat is not None: 

1132 # out.qval = np.unpackbits(np.array([int(b) for b in b64decode(cif.qdat)], 

1133 # dtype='uint8')) 

1134 

1135 out.hkls = None 

1136 if hasattr(cif, 'hkls'): 

1137 out.hkls = cif.hkls 

1138 

1139 return out 

1140 

1141 def next_cif_id(self): 

1142 """next available CIF ID > 200000 that is not in current table""" 

1143 max_id = 200_000 

1144 tabcif = self.tables['cif'] 

1145 for row in self.execall(select(tabcif.c.id).where(tabcif.c.id>200000)): 

1146 if row[0] > max_id: 

1147 max_id = row[0] 

1148 return max_id + 1 

1149 

1150 

1151 def all_minerals(self): 

1152 names = [] 

1153 for row in self.get_all('minerals'): 

1154 if row.name not in names: 

1155 names.append(row.name) 

1156 return names 

1157 

1158 def all_authors(self): 

1159 names = [] 

1160 for row in self.get_all('authors'): 

1161 if row.name not in names: 

1162 names.append(row.name) 

1163 return names 

1164 

1165 def all_journals(self): 

1166 names = [] 

1167 for row in self.get_all('publications'): 

1168 if row.journalname not in names: 

1169 names.append(row.journalname) 

1170 return names 

1171 

1172 def get_cif_elems(self): 

1173 if self.cif_elems is None: 

1174 out = {} 

1175 for row in self.get_all('cif_elements'): 

1176 cifid = int(row.cif_id) 

1177 if cifid not in out: 

1178 out[cifid] = [] 

1179 if row.element not in out[cifid]: 

1180 out[cifid].append(row.element) 

1181 

1182 self.cif_elems = out 

1183 return self.cif_elems 

1184 

1185 

1186 def find_cifs(self, id=None, mineral_name=None, author_name=None, 

1187 journal_name=None, contains_elements=None, 

1188 excludes_elements=None, strict_contains=False, 

1189 full_occupancy=False, max_matches=1000): 

1190 """return list of CIF Structures matching mineral, publication, or elements 

1191 """ 

1192 if id is not None: 

1193 thiscif = self.get_cif(id) 

1194 if thiscif is not None: 

1195 return [thiscif] 

1196 

1197 tabcif = self.tables['cif'] 

1198 tabmin = self.tables['minerals'] 

1199 tabpub = self.tables['publications'] 

1200 tabaut = self.tables['authors'] 

1201 tab_ap = self.tables['publication_authors'] 

1202 tab_ce = self.tables['cif_elements'] 

1203 

1204 matches = [] 

1205 t0 = time.time() 

1206 if mineral_name is None: 

1207 mineral_name = '' 

1208 mineral_name = mineral_name.strip() 

1209 

1210 if mineral_name not in (None, '') and ('*' in mineral_name or 

1211 '^' in mineral_name or 

1212 '$' in mineral_name): 

1213 pattern = mineral_name.replace('*', '.*').replace('..*', '.*') 

1214 matches = [] 

1215 for row in self.get_all('minerals'): 

1216 if re.search(pattern, row.name, flags=re.IGNORECASE) is not None: 

1217 query = select(tabcif.c.id).where(tabcif.c.mineral_id==row.id) 

1218 for m in [row[0] for row in self.execall(query)]: 

1219 if m not in matches: 

1220 matches.append(m) 

1221 

1222 if journal_name not in (None, ''): 

1223 pattern = journal_name.replace('*', '.*').replace('..*', '.*') 

1224 new_matches = [] 

1225 for c in matches: 

1226 pub_id = self.execone(select(tabcif.c.publication_id 

1227 ).where(tabcif.c.id==c)) 

1228 this_journal = self.execone(select(tabpub.c.journalname 

1229 ).where(tabpub.c.id==pub_id)) 

1230 if re.search(pattern, this_journal, flags=re.IGNORECASE) is not None: 

1231 new_matches.append[c] 

1232 matches = new_matches 

1233 

1234 

1235 else: # strict mineral name or no mineral name 

1236 args = [] 

1237 if mineral_name not in (None, ''): 

1238 args.append(func.lower(tabmin.c.name)==mineral_name.lower()) 

1239 args.append(tabmin.c.id==tabcif.c.mineral_id) 

1240 

1241 if journal_name not in (None, ''): 

1242 args.append(func.lower(tabpub.c.journalname)==journal_name.lower()) 

1243 args.append(tabpub.c.id==tabcif.c.publication_id) 

1244 

1245 if author_name not in (None, ''): 

1246 args.append(func.lower(tabaut.c.name)==author_name.lower()) 

1247 args.append(tabcif.c.publication_id==tab_ap.c.publication_id) 

1248 args.append(tabaut.c.id==tab_ap.c.author_id) 

1249 

1250 query = select(tabcif.c.id) 

1251 if len(args) > 0: 

1252 query = select(tabcif.c.id).where(and_(*args)) 

1253 matches = [row[0] for row in self.execall(query)] 

1254 matches = list(set(matches)) 

1255 # 

1256 cif_elems = self.get_cif_elems() 

1257 if contains_elements is not None: 

1258 for el in contains_elements: 

1259 new_matches = [] 

1260 for row in matches: 

1261 if row in cif_elems and el in cif_elems[row]: 

1262 new_matches.append(row) 

1263 matches = new_matches 

1264 

1265 if strict_contains: 

1266 excludes_elements = ATOM_SYMS[:] 

1267 for c in contains_elements: 

1268 if c in excludes_elements: 

1269 excludes_elements.remove(c) 

1270 if excludes_elements is not None: 

1271 bad = [] 

1272 for el in excludes_elements: 

1273 for row in matches: 

1274 if el in cif_elems[row] and row not in bad: 

1275 bad.append(row) 

1276 for row in bad: 

1277 matches.remove(row) 

1278 

1279 

1280 if full_occupancy: 

1281 good = [] 

1282 for cif_id in matches: 

1283 cif = self.execone(tabcif.select().where(tabcif.c.id==cif_id)) 

1284 occ = get_optarray(getattr(cif, 'atoms_occupancy')) 

1285 if occ in ('0', 0, None): 

1286 good.append(cif_id) 

1287 else: 

1288 try: 

1289 min_wt = min([float(x) for x in occ]) 

1290 except: 

1291 min_wt = 0 

1292 if min_wt > 0.96: 

1293 good.append(cif_id) 

1294 matches = good 

1295 

1296 if len(matches) > max_matches: 

1297 matches = matches[:max_matches] 

1298 return [self.get_cif(cid) for cid in matches] 

1299 

1300 def set_hkls(self, cifid, hkls, degens): 

1301 ctab = self.tables['cif'] 

1302 packed_hkls = pack_hkl_degen(hkls, degens) 

1303 self.update(ctab, whereclause=(ctab.c.id == cifid), hkls=packed_hkls) 

1304 return packed_hkls 

1305 

1306def get_amcsd(download_full=True, timeout=30): 

1307 """return instance of the AMCSD CIF Database 

1308 

1309 Returns: 

1310 AMCSD database 

1311 Example: 

1312 

1313 """ 

1314 global _CIFDB 

1315 if _CIFDB is not None: 

1316 return _CIFDB 

1317 

1318 dbfull = os.path.join(user_larchdir, AMCSD_FULL) 

1319 if os.path.exists(dbfull): 

1320 _CIFDB = AMCSD(dbfull) 

1321 return _CIFDB 

1322 t0 = time.time() 

1323 if download_full: 

1324 requests.packages.urllib3.disable_warnings(InsecureRequestWarning) 

1325 for src in SOURCE_URLS: 

1326 url = f"{src:s}/{AMCSD_FULL:s}" 

1327 req = requests.get(url, verify=True, timeout=timeout) 

1328 if req.status_code == 200: 

1329 break 

1330 if req.status_code == 200: 

1331 with open(dbfull, 'wb') as fh: 

1332 fh.write(req.content) 

1333 print("Downloaded %s : %.2f sec" % (dbfull, time.time()-t0)) 

1334 time.sleep(0.25) 

1335 _CIFDB = AMCSD(dbfull) 

1336 return _CIFDB 

1337 # finally download of full must have failed 

1338 return AMCSD() 

1339 

1340def get_cif(ams_id): 

1341 """ 

1342 get CIF Structure by AMS ID 

1343 """ 

1344 db = get_amcsd() 

1345 return db.get_cif(ams_id) 

1346 

1347def find_cifs(mineral_name=None, journal_name=None, author_name=None, 

1348 contains_elements=None, excludes_elements=None, 

1349 strict_contains=False, full_occupancy=False): 

1350 

1351 """ 

1352 return a list of CIF Structures matching a set of criteria: 

1353 

1354 mineral_name: case-insensitive match of mineral name 

1355 journal_name: 

1356 author_name: 

1357 containselements: list of atomic symbols required to be in structure 

1358 excludes_elements: list of atomic symbols required to NOT be in structure 

1359 strict_contains: `contains_elements` is complete -- no other elements 

1360 

1361 

1362 """ 

1363 db = get_amcsd() 

1364 return db.find_cifs(mineral_name=mineral_name, 

1365 journal_name=journal_name, 

1366 author_name=author_name, 

1367 contains_elements=contains_elements, 

1368 excludes_elements=excludes_elements, 

1369 strict_contains=strict_contains, 

1370 full_occupancy=full_occupancy)