Coverage for /Users/Newville/Codes/xraylarch/larch/xrd/amcsd.py: 10%
864 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-09 10:08 -0600
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-09 10:08 -0600
1#!/usr/bin/env python
2"""
3AMCIFDB: American Mineralogical CIF database as sqlite3 database/python
5Usage:
6 amcifdb = AMCIFDB('amcif.db')
8add a CIF file:
9 amcifdb.add_ciffile('NewFile.cif')
11generatt the text of a CIF file from index:
12 cif_text = amcifdb.get_ciftext(300)
14OK, that looks like 'well, why not just save the CIF files'?
16And the answers are that there are simple methods for:
17 a) getting the XRD Q points
18 b) getting structure factors
19 c) getting atomic clustes as for feff files
20 d) saving Feff.inp files
23"""
25import sys
26import os
27import re
28import time
29import json
30from io import StringIO
31from string import ascii_letters
32from base64 import b64encode, b64decode
33from collections import namedtuple
34from gzip import GzipFile
35import requests
36from requests.packages.urllib3.exceptions import InsecureRequestWarning
37import atexit
38import numpy as np
40from sqlalchemy import MetaData, create_engine, func, text, and_, Table
41from sqlalchemy import __version__ as sqla_version
42from sqlalchemy.sql import select as sqla_select
43from sqlalchemy.orm import sessionmaker
46from .amcsd_utils import (make_engine, isAMCSD, put_optarray, get_optarray,
47 PMG_CIF_OPTS, CifParser, SpacegroupAnalyzer)
49from xraydb.chemparser import chemparse
50from xraydb import f0, f1_chantler, f2_chantler
53from .xrd_tools import generate_hkl, d_from_hkl, twth_from_q, E_from_lambda
54from .cif2feff import cif2feffinp
55from ..utils import isotime, mkdir
56from ..utils.strutils import version_ge, bytes2str
57from ..utils.physical_constants import TAU, ATOM_SYMS
58from ..site_config import user_larchdir
59from .. import logger
61_CIFDB = None
62ALL_HKLS = None
63AMCSD_TRIM = 'amcsd_cif1.db'
64AMCSD_FULL = 'amcsd_cif2.db'
66SOURCE_URLS = ('https://docs.xrayabsorption.org/databases/',
67 'https://millenia.cars.aps.anl.gov/xraylarch/downloads/')
69CIF_TEXTCOLUMNS = ('formula', 'compound', 'pub_title', 'formula_title', 'a',
70 'b', 'c', 'alpha', 'beta', 'gamma', 'cell_volume',
71 'crystal_density', 'atoms_sites', 'atoms_x', 'atoms_y',
72 'atoms_z', 'atoms_occupancy', 'atoms_u_iso',
73 'atoms_aniso_label', 'atoms_aniso_u11', 'atoms_aniso_u22',
74 'atoms_aniso_u33', 'atoms_aniso_u12', 'atoms_aniso_u13',
75 'atoms_aniso_u23', 'qdat','url', 'hkls')
79CifPublication = namedtuple('CifPublication', ('id', 'journalname', 'year',
80 'volume', 'page_first',
81 'page_last', 'authors'))
84StructureFactor = namedtuple('StructureFactor', ('q', 'intensity', 'hkl',
85 'twotheta', 'd',
86 'wavelength', 'energy',
87 'f2hkl', 'degen', 'lorentz'))
90# for packing/unpacking H, K, L to 2-character hash
91HKL_ENCODE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_%'
92def pack_hkl(h, k, l):
93 """pack H, K, L values into 2 character sequence of
94 printable characters for storage and transmission
96 H, K, L must be unsigned integers from 0 to 15
98 see also unpack_hkl() to reverse the process.
99 """
100 if (h > 15 or k > 15 or l > 15 or
101 h < 0 or k < 0 or l < 0):
102 raise ValueError("hkl values out of range (max=15)")
103 x = h*256 + k*16 + l
104 return HKL_ENCODE[x//64] + HKL_ENCODE[x%64]
107def unpack_hkl(hash):
108 """unpack encoded H, K, L integers packed with pack_hkl()"""
109 a, b = HKL_ENCODE.index(hash[0]), HKL_ENCODE.index(hash[1])
110 s = a*64 + b
111 t = s//16
112 return t//16, t%16, s%16
115def pack_hkl_degen(hkls, degen):
116 """pack array of H, K, L and degeneracy values into printable
117 string for storage and transmission
118 hkl must be an array or list of list/tuples for H, K, L, with
119 each H, K, L value an unsigned integers from 0 to 15
121 hkls and degen must be ndarrays or lists of the same length
122 see also unpack_hkl_degen() to reverse the process.
123 """
124 if len(hkls) != len(degen):
125 raise ValueError("hkls and degen must be the same length in pack_hkl_degen()")
127 shkl = [pack_hkl(h, k, l) for h, k, l in hkls]
128 sdegen = json.dumps(degen.tolist()).replace(' ', '')
129 return f"{''.join(shkl)}|{sdegen}"
132def unpack_hkl_degen(sinp):
133 """pack arrays of h, k, l and degeneracies from string stored by pack_hkl_degen
134 see also pack_hkl_degen()
135 """
136 shkl, sdegen = sinp.split('|')
137 n = len(shkl)//2
138 hkls = []
139 for i in range(n):
140 hkls.append(unpack_hkl(shkl[2*i:2*i+2]))
141 degen = json.loads(sdegen)
142 return np.array(hkls), np.array(degen)
146def select(*args):
147 """wrap sqlalchemy select for version 1.3 and 2.0"""
148 # print("SELECT ", args, type(args))
149 # print(sqla_version, version_ge(sqla_version, '1.4.0'))
150 if version_ge(sqla_version, '1.4.0'):
151 return sqla_select(*args)
152 else:
153 return sqla_select(tuple(args))
156def get_nonzero(thing):
157 try:
158 if len(thing) == 1 and abs(thing[0]) < 1.e-5:
159 return None
160 except:
161 pass
162 return thing
164def clean_elemsym(sym):
165 sx = (sym + ' ')[:2]
166 return ''.join([s.strip() for s in sx if s in ascii_letters])
169def parse_cif_file(filename):
170 """parse ciffile, extract data for 1st listed structure,
171 and do some basic checks:
172 must have formula
173 must have spacegroup
174 returns dat, formula, json-dumped symm_xyz
175 """
176 if CifParser is None:
177 raise ValueError("CifParser from pymatgen not available. Try 'pip install pymatgen'.")
179 cif = CifParser(filename, **PMG_CIF_OPTS)
180 cifkey = list(cif._cif.data.keys())[0]
181 dat = cif._cif.data[cifkey].data
183 formula = None
184 for formname in ('_chemical_formula_sum', '_chemical_formula_moiety'):
185 if formname in dat:
186 try:
187 parsed_formula = chemparse(dat[formname])
188 formula = dat[formname]
189 except:
190 pass
191 if formula is None and '_atom_site_type_symbol' in dat:
192 comps = {}
193 complist = dat['_atom_site_type_symbol']
194 for c in complist:
195 if c not in comps:
196 nx = complist.count(c)
197 comps[c] = '%s%d' % (c, nx) if nx != 1 else c
198 formula = ''.join(comps.values())
200 if formula is None:
201 raise ValueError(f'Cannot read chemical formula from file {filename:s}')
203 # get spacegroup and symmetry
204 sgroup_name = dat.get('_symmetry_space_group_name_H-M', None)
205 if sgroup_name is None:
206 for key, val in dat.items():
207 if 'space_group' in key and 'H-M' in key:
208 sgroup_name = val
210 symm_xyz = dat.get('_space_group_symop_operation_xyz', None)
211 if symm_xyz is None:
212 symm_xyz = dat.get('_symmetry_equiv_pos_as_xyz', None)
213 if symm_xyz is None:
214 raise ValueError(f'Cannot read symmetries from file {filename:s}')
216 symm_xyz = json.dumps(symm_xyz)
217 return dat, formula, symm_xyz
220class CifStructure():
221 """representation of a Cif Structure
222 """
224 def __init__(self, ams_id=None, ams_db=None, publication=None, mineral=None,
225 spacegroup=None, hm_symbol=None, formula_title=None,
226 compound=None, formula=None, pub_title=None, a=None, b=None,
227 c=None, alpha=None, beta=None, gamma=None, hkls=None,
228 cell_volume=None, crystal_density=None,
229 atoms_sites='<missing>', atoms_aniso_label='<missing>',
230 atoms_x=None, atoms_y=None, atoms_z=None,
231 atoms_occupancy=None, atoms_u_iso=None, atoms_aniso_u11=None,
232 atoms_aniso_u22=None, atoms_aniso_u33=None,
233 atoms_aniso_u12=None, atoms_aniso_u13=None,
234 atoms_aniso_u23=None):
236 self.ams_id = ams_id
237 self.ams_db = ams_db
238 self.publication = publication
239 self.mineral = mineral
240 self.spacegroup = spacegroup
241 self.hm_symbol = hm_symbol
242 self.formula_title = formula_title
243 self.compound = compound
244 self.formula = formula
245 self.pub_title = pub_title
246 self.a = a
247 self.b = b
248 self.c = c
249 self.alpha = alpha
250 self.beta = beta
251 self.gamma = gamma
252 self.hkls = hkls
253 self.cell_volume = cell_volume
254 self.crystal_density = crystal_density
255 self.atoms_sites = atoms_sites
256 self.atoms_aniso_label = atoms_aniso_label
257 self.atoms_x = atoms_x
258 self.atoms_y = atoms_y
259 self.atoms_z = atoms_z
260 self.atoms_occupancy = get_nonzero(atoms_occupancy)
261 self.atoms_u_iso = get_nonzero(atoms_u_iso)
262 self.atoms_aniso_u11 = get_nonzero(atoms_aniso_u11)
263 self.atoms_aniso_u22 = get_nonzero(atoms_aniso_u22)
264 self.atoms_aniso_u33 = get_nonzero(atoms_aniso_u33)
265 self.atoms_aniso_u12 = get_nonzero(atoms_aniso_u12)
266 self.atoms_aniso_u13 = get_nonzero(atoms_aniso_u13)
267 self.atoms_aniso_u23 = get_nonzero(atoms_aniso_u23)
268 self.natoms = 0
269 self._ciftext = None
270 self.pmg_pstruct = None
271 self.pmg_cstruct = None
272 if atoms_sites not in (None, '<missing>'):
273 self.natoms = len(atoms_sites)
275 def __repr__(self):
276 if self.ams_id is None or self.formula is None:
277 return '<CifStructure empty>'
278 return f'<CifStructure, ams_id={self.ams_id:d}, formula={self.formula:s}>'
280 def get_mineralname(self):
281 minname = self.mineral.name
282 if minname == '<missing>':
283 minname =self.formula_title
284 if minname == '<missing>':
285 minname = 'missing'
286 return minname
289 @property
290 def ciftext(self):
291 if self._ciftext is not None:
292 return self._ciftext
294 out = ['data_global']
295 if self.formula_title != '<missing>':
296 out.append(f"_amcsd_formula_title '{self.formula_title:s}'")
298 if self.mineral.name != '<missing>':
299 out.append(f"_chemical_name_mineral '{self.mineral.name:s}'")
300 out.append('loop_')
301 out.append('_publ_author_name')
302 for a in self.publication.authors:
303 out.append(f"'{a:s}'")
305 out.append(f"_journal_name_full '{self.publication.journalname}'")
306 out.append(f"_journal_volume {self.publication.volume}")
307 out.append(f"_journal_year {self.publication.year}")
308 out.append(f"_journal_page_first {self.publication.page_first}")
309 out.append(f"_journal_page_last {self.publication.page_last}")
310 out.append('_publ_section_title')
311 out.append(';')
312 out.append(f"{self.pub_title:s}")
313 out.append(';')
314 out.append(f"_database_code_amcsd {self.ams_id:07d}")
315 if self.compound != '<missing>':
316 out.append(f"_chemical_compound_source '{self.compound}'")
317 out.append(f"_chemical_formula_sum '{self.formula}'")
318 out.append(f"_cell_length_a {self.a}")
319 out.append(f"_cell_length_b {self.b}")
320 out.append(f"_cell_length_c {self.c}")
321 out.append(f"_cell_angle_alpha {self.alpha}")
322 out.append(f"_cell_angle_beta {self.beta}")
323 out.append(f"_cell_angle_gamma {self.gamma}")
324 out.append(f"_cell_volume {self.cell_volume}")
325 out.append(f"_exptl_crystal_density_diffrn {self.crystal_density}")
326 out.append(f"_symmetry_space_group_name_H-M '{self.hm_symbol}'")
327 out.append('loop_')
328 out.append('_space_group_symop_operation_xyz')
329 for xyzop in json.loads(self.spacegroup.symmetry_xyz):
330 out.append(f" '{xyzop:s}'")
332 atoms_sites = self.atoms_sites
333 if atoms_sites not in (None, 'None', '0', '<missing>'):
334 out.append('loop_')
335 out.append('_atom_site_label')
336 out.append('_atom_site_fract_x')
337 out.append('_atom_site_fract_y')
338 out.append('_atom_site_fract_z')
341 natoms = len(atoms_sites)
342 atoms_x = self.atoms_x
343 atoms_y = self.atoms_y
344 atoms_z = self.atoms_z
345 atoms_occ = self.atoms_occupancy
346 atoms_u_iso = self.atoms_u_iso
347 if atoms_occ is not None:
348 out.append('_atom_site_occupancy')
349 if atoms_u_iso is not None:
350 out.append('_atom_site_U_iso_or_equiv')
351 for i in range(natoms):
352 adat = f"{atoms_sites[i]} {atoms_x[i]} {atoms_y[i]} {atoms_z[i]}"
353 if atoms_occ is not None:
354 adat += f" {atoms_occ[i]}"
355 if atoms_u_iso is not None:
356 adat += f" {atoms_u_iso[i]}"
357 out.append(adat)
359 aniso_label = self.atoms_aniso_label
360 if aniso_label not in (None, '0', '<missing>'):
361 out.append('loop_')
362 out.append('_atom_site_aniso_label')
363 out.append('_atom_site_aniso_U_11')
364 out.append('_atom_site_aniso_U_22')
365 out.append('_atom_site_aniso_U_33')
366 out.append('_atom_site_aniso_U_12')
367 out.append('_atom_site_aniso_U_13')
368 out.append('_atom_site_aniso_U_23')
369 natoms = len(aniso_label)
370 u11 = self.atoms_aniso_u11
371 u22 = self.atoms_aniso_u22
372 u33 = self.atoms_aniso_u33
373 u12 = self.atoms_aniso_u12
374 u13 = self.atoms_aniso_u13
375 u23 = self.atoms_aniso_u23
377 for i in range(natoms):
378 out.append(f"{aniso_label[i]} {u11[i]} {u22[i]} {u33[i]} {u12[i]} {u13[i]} {u23[i]}")
380 out.append('')
381 out.append('')
382 self._ciftext = '\n'.join(out)
383 return self.ciftext
386 def find_hkls(self, nmax=64, qmax=10, wavelength=0.75):
387 """find the HKLs and degeneracies of the strongest reflections
389 this will calculate structure factors, and sort them, but the
390 purpose is really to do a filter to find the strongest HKLs that
391 can then be saved and restored for structure factor calcs using
392 only the most important HKL values.
394 returns hkls, degen of the nmax reflections with the highest scattered intensity
395 """
396 self.get_pmg_struct()
398 pstruct = self.pmg_pstruct
399 cstruct = self.pmg_cstruct
400 if pstruct is None:
401 print(f"pymatgen could not parse CIF structure for CIF {self.ams_id}")
402 return
404 global ALL_HKLS
405 if ALL_HKLS is None:
406 ALL_HKLS = generate_hkl(hmax=15, kmax=15, lmax=15, positive_only=False)
408 hkls = ALL_HKLS[:]
409 unitcell = self.get_unitcell()
410 qhkls = TAU / d_from_hkl(hkls, **unitcell)
412 # remove q values outside of range
413 qfilt = (qhkls < qmax)
414 qhkls = qhkls[qfilt]
415 hkls = hkls[qfilt]
417 # find duplicate q-values, set degen
418 # scale up q values to better find duplicates
419 qscaled = [int(round(q*1.e9)) for q in qhkls]
420 q_unique, q_degen, hkl_unique = [], [], []
421 for i, q in enumerate(qscaled):
422 if q in q_unique:
423 q_degen[q_unique.index(q)] += 1
424 else:
425 q_unique.append(q)
426 q_degen.append(1)
427 hkl_unique.append(hkls[i])
429 qorder = np.argsort(q_unique)
430 qhkls = 1.e-9*np.array(q_unique)[qorder]
431 hkls = abs(np.array(hkl_unique)[qorder])
432 degen = np.array(q_degen)[qorder]
434 # note the f2 is calculated here without resonant corrections
435 f2 = self.calculate_f2(hkls, qhkls=qhkls, wavelength=None)
437 # filter out very small structure factors
438 ffilt = (f2 > 1.e-6*max(f2))
439 qhkls = qhkls[ffilt]
440 hkls = hkls[ffilt]
441 degen = degen[ffilt]
442 f2 = f2[ffilt]
444 # lorentz and polarization correction
445 arad = (TAU/360)*twth_from_q(qhkls, wavelength)
446 corr = (1+np.cos(arad)**2)/(np.sin(arad/2)**2*np.cos(arad/2))
448 intensity = f2 * degen * corr
449 ifilt = (intensity > 0.005*max(intensity))
451 intensity = intensity[ifilt] / max(intensity)
452 qhkls = qhkls[ifilt]
453 hkls = hkls[ifilt]
454 degen = degen[ifilt]
456 # indices of peaks in descending order of intensity
457 main_peaks = np.argsort(intensity)[::-1][:nmax]
459 hkls_main, degen_main = hkls[main_peaks], degen[main_peaks]
460 if self.ams_db is not None:
461 self.hkls = self.ams_db.set_hkls(self.ams_id, hkls_main, degen_main)
463 return hkls_main, degen_main
465 def get_structure_factors(self, wavelength=0.75):
466 """given arrays of HKLs and degeneracies (perhaps from find_hkls(),
467 return structure factors
469 This is a lot like find_hkls(), but with the assumption that HKLs
470 are not to be filtered or altered.
471 """
472 if self.hkls is None:
473 self.find_hkls(nmax=64, qmax=10, wavelength=wavelength)
475 hkls, degen = unpack_hkl_degen(self.hkls)
477 self.get_pmg_struct()
478 pstruct = self.pmg_pstruct
479 if pstruct is None:
480 print(f"pymatgen could not parse CIF structure for CIF {self.ams_id}")
481 return
483 unitcell = self.get_unitcell()
484 dhkls = d_from_hkl(hkls, **unitcell)
485 qhkls = TAU / dhkls
487 # sort by q
488 qsort = np.argsort(qhkls)
489 qhkls = qhkls[qsort]
490 dhkls = dhkls[qsort]
491 hkls = hkls[qsort]
492 degen = degen[qsort]
494 energy = E_from_lambda(wavelength, E_units='eV')
496 f2hkl = self.calculate_f2(hkls, qhkls=qhkls, wavelength=wavelength)
498 # lorentz and polarization correction
499 twoth = twth_from_q(qhkls, wavelength)
500 arad = (TAU/360)*twoth
501 corr = (1+np.cos(arad)**2)/(np.sin(arad/2)**2*np.cos(arad/2))
503 intensity = f2hkl * degen * corr
505 return StructureFactor(q=qhkls, intensity=intensity, hkl=hkls, d=dhkls,
506 f2hkl=f2hkl, twotheta=twoth, degen=degen,
507 lorentz=corr, wavelength=wavelength,
508 energy=energy)
511 def calculate_f2(self, hkls, qhkls=None, energy=None, wavelength=None):
512 """calculate F*F'.
514 If wavelength (in Ang) or energy (in eV) is not None, then
515 resonant corrections will be included.
516 """
517 if qhkls is None:
518 unitcell = self.get_unitcell()
519 qhkls = TAU / d_from_hkl(hkls, **unitcell)
520 sq = qhkls/(2*TAU)
521 sites = self.get_sites()
523 if energy is None and wavelength is not None:
524 energy = E_from_lambda(wavelength, E_units='eV')
526 # get f0 and resonant scattering factors
527 f0vals, f1vals, f2vals = {}, {}, {}
528 for elem in sites.keys():
529 if elem not in f0vals:
530 f0vals[elem] = f0(elem, sq)
531 if energy is not None:
532 f1vals[elem] = f1_chantler(elem, energy)
533 f2vals[elem] = f2_chantler(elem, energy)
535 # and f2
536 f2 = np.zeros(len(hkls))
537 for i, hkl in enumerate(hkls):
538 fsum = 0.
539 for elem in f0vals:
540 fval = f0vals[elem][i]
541 if energy is not None:
542 fval += f1vals[elem] - 1j*f2vals[elem]
543 for occu, fcoord in sites[elem]:
544 fsum += fval*occu*np.exp(1j*TAU*(fcoord*hkl).sum())
545 f2[i] = (fsum*fsum.conjugate()).real
546 return f2
549 def get_pmg_struct(self):
550 if self.pmg_cstruct is not None and self.pmg_pstruct is not None:
551 return
553 try:
554 pmcif = CifParser(StringIO(self.ciftext), **PMG_CIF_OPTS)
555 self.pmg_cstruct = pmcif.get_structures()[0]
556 self.pmg_pstruct = SpacegroupAnalyzer(self.pmg_cstruct
557 ).get_conventional_standard_structure()
558 except:
559 print(f"pymatgen could not parse CIF structure for CIF {self.ams_id}")
562 def get_unitcell(self):
563 "unitcell as dict, from PMG structure"
564 self.get_pmg_struct()
565 pstruct = self.pmg_pstruct
566 if pstruct is None:
567 print(f"pymatgen could not parse CIF structure for CIF {self.ams_id}")
568 return
569 pdict = pstruct.as_dict()
570 unitcell = {}
571 for a in ('a', 'b', 'c', 'alpha', 'beta', 'gamma', 'volume'):
572 unitcell[a] = pdict['lattice'][a]
573 return unitcell
575 def get_sites(self):
576 "dictionary of sites, from PMG structure"
577 self.get_pmg_struct()
578 pstruct = self.pmg_pstruct
579 if pstruct is None:
580 print(f"pymatgen could not parse CIF structure for CIF {self.ams_id}")
581 return
583 sites = {}
584 for site in pstruct.sites:
585 sdat = site.as_dict()
586 fcoords = sdat['abc']
588 for spec in sdat['species']:
589 elem = spec['element']
590 if elem == 'Nh': elem = 'N'
591 if elem == 'Og':
592 elem = 'O'
593 if elem in ('Hs', 'D'):
594 elem = 'H'
595 if elem.startswith('Dh') or elem.startswith('Dd') or elem.startswith('Dw'):
596 elem = 'H'
597 if elem == 'Fl':
598 elem = 'F'
599 occu = spec['occu']
600 if elem not in sites:
601 sites[elem] = [(occu, fcoords)]
602 else:
603 sites[elem].append([occu, fcoords])
604 return sites
608 def get_feffinp(self, absorber, edge=None, cluster_size=8.0, absorber_site=1,
609 with_h=False, version8=True):
610 pub = self.publication
611 journal = f"{pub.journalname} {pub.volume}, pp. {pub.page_first}-{pub.page_last} ({pub.year:d})"
612 authors = ', '.join(pub.authors)
613 titles = [f'Structure from AMCSD, AMS_ID: {self.ams_id:d}',
614 f'Mineral Name: {self.mineral.name:s}']
616 if not self.formula_title.startswith('<missing'):
617 titles.append(f'Formula Title: {self.formula_title}')
619 titles.extend([f'Journal: {journal}', f'Authors: {authors}'])
620 if not self.pub_title.startswith('<missing'):
621 for i, line in enumerate(self.pub_title.split('\n')):
622 titles.append(f'Title{i+1:d}: {line}')
624 return cif2feffinp(self.ciftext, absorber, edge=edge,
625 cluster_size=cluster_size, with_h=with_h,
626 absorber_site=absorber_site,
627 extra_titles=titles, version8=version8)
629 def save_feffinp(self, absorber, edge=None, cluster_size=8.0, absorber_site=1,
630 filename=None, version8=True):
631 feff6text = self.get_feffinp(absorber, edge=edge, cluster_size=cluster_size,
632 absorber_site=absorber_site, version8=version8)
633 if filename is None:
634 min_name = self.mineral.name.lower()
635 if min_name in ('', '<missing>', 'None'):
636 name = f'{absorber:s}_{edge:s}_CIF{self.ams_id:06d}'
637 else:
638 name = f'{absorber:s}_{edge:s}_{min_name:s}_CIF{self.ams_id:06d}'
640 ffolder = os.path.join(user_larchdir, 'feff', name)
641 mkdir(ffolder)
642 filename = os.path.join(ffolder, 'feff.inp')
643 with open(filename, 'w', encoding=sys.getdefaultencoding()) as fh:
644 fh.write(feff6text)
645 return filename
647class AMCSD():
648 """
649 Database of CIF structure data from the American Mineralogical Crystal Structure Database
651 http://rruff.geo.arizona.edu/AMS/amcsd.php
653 """
654 def __init__(self, dbname=None, read_only=False):
655 "connect to an existing database"
656 if dbname is None:
657 parent, _ = os.path.split(__file__)
658 dbname = os.path.join(parent, AMCSD_TRIM)
659 if not os.path.exists(dbname):
660 raise IOError("Database '%s' not found!" % dbname)
662 if not isAMCSD(dbname):
663 raise ValueError("'%s' is not a valid AMCSD Database!" % dbname)
665 self.connect(dbname, read_only=read_only)
666 atexit.register(self.finalize_amcsd)
667 ciftab = self.tables['cif']
668 for colname in CIF_TEXTCOLUMNS:
669 if colname not in ciftab.columns and not read_only:
670 self.session.execute(text(f'alter table cif add column {colname} text'))
671 self.close()
672 self.connect(dbname, read_only=read_only)
673 time.sleep(0.1)
674 self.insert('version', tag=f'with {colname}', date=isotime(),
675 notes=f'added {colname} column to cif table')
677 def finalize_amcsd(self):
678 conn = getattr(self, 'conn', None)
679 if conn is not None:
680 conn.close()
682 def connect(self, dbname, read_only=False):
683 self.dbname = dbname
684 self.engine = make_engine(dbname)
685 self.conn = self.engine.connect()
686 kwargs = {'bind': self.engine, 'autoflush': True, 'autocommit': False}
687 self.session = sessionmaker(**kwargs)()
688 if read_only:
689 def readonly_flush(*args, **kwargs):
690 return
691 self.session.flush = readonly_flush
693 self.metadata = MetaData()
694 self.metadata.reflect(bind=self.engine)
695 self.tables = self.metadata.tables
696 self.cif_elems = None
698 def close(self):
699 "close session"
700 self.session.flush()
701 self.session.close()
703 def query(self, *args, **kws):
704 "generic query"
705 return self.session.query(*args, **kws)
707 def insert(self, tablename, **kws):
708 if isinstance(tablename, Table):
709 table = tablename
710 else:
711 table = self.tables[tablename]
712 stmt = table.insert().values(kws)
713 out = self.session.execute(stmt)
714 self.session.commit()
715 self.session.flush()
717 def update(self, tablename, whereclause=False, **kws):
718 if isinstance(tablename, Table):
719 table = tablename
720 else:
721 table = self.tables[tablename]
723 stmt = table.update().where(whereclause).values(kws)
724 out = self.session.execute(stmt)
725 self.session.commit()
726 self.session.flush()
728 def execall(self, query):
729 return self.session.execute(query).fetchall()
731 def execone(self, query):
732 results = self.session.execute(query).fetchone()
733 if results is None or len(results) < 1:
734 return None
735 return results
737 def get_all(self, tablename):
738 return self.execall(self.tables[tablename].select())
741 def get_version(self, long=False, with_history=False):
742 """
743 return sqlite3 database and python library version numbers
745 Parameters:
746 long (bool): show timestamp and notes of latest version [False]
747 with_history (bool): show complete version history [False]
749 Returns:
750 string: version information
751 """
752 out = []
753 rows = self.get_all('version')
754 if not with_history:
755 rows = rows[-1:]
756 if long or with_history:
757 for row in rows:
758 out.append(f"AMCSD Version: {row.tag} [{row.date}] '{row.notes}'")
759 out.append(f"Python Version: {__version__}")
760 out = "\n".join(out)
761 elif rows is None:
762 out = f"AMCSD Version: unknown, Python Version: {__version__}"
763 else:
764 out = f"AMCSD Version: {rows[0].tag}, Python Version: {__version__}"
765 return out
767 def _get_tablerow(self, table, name, add=True):
768 tab = self.tables[table]
769 if '"' in name:
770 name = name.replace('"', '\"')
771 rows = self.execall(tab.select().where(tab.c.name==name))
772 if len(rows) == 0:
773 if not add:
774 return None
775 self.insert(tab, name=name)
776 rows = self.execall(tab.select().where(tab.c.name==name))
777 return rows[0]
779 def get_spacegroup(self, hm_name):
780 """get row from spacegroups table by HM notation. See add_spacegroup()
781 """
782 tab = self.tables['spacegroups']
783 rows = self.execall(tab.select().where(tab.c.hm_notation==hm_name))
784 if len(rows) >0:
785 return rows[0]
786 return None
789 def add_spacegroup(self, hm_name, symmetry_xyz, category=None):
790 """add entry to spacegroups table, including HM notation and CIF symmetry operations
791 """
792 sg = self.get_spacegroup(hm_name)
793 if sg is not None and sg.symmetry_xyz == symmetry_xyz:
794 return sg
796 args = {'hm_notation': hm_name, 'symmetry_xyz': symmetry_xyz}
797 if category is not None:
798 args['category'] = category
799 self.insert('spacegroups', **args)
800 return self.get_spacegroup(hm_name)
802 def get_publications(self, journalname=None, year=None, volume=None,
803 page_first=None, page_last=None, id=None):
804 """get rows from publications table by journalname, year (required)
805 and optionally volume, page_first, or page_last.
806 """
807 tab = self.tables['publications']
809 args = []
810 if journalname is not None:
811 args.append(func.lower(tab.c.journalname)==journalname.lower())
812 if year is not None:
813 args.append(tab.c.year==int(year))
814 if volume is not None:
815 args.append(tab.c.volume==str(volume))
816 if page_first is not None:
817 args.append(tab.c.page_first==str(page_first))
818 if page_last is not None:
819 args.append(tab.c.page_last==str(page_last))
820 if id is not None:
821 args.append(tab.c.id==id)
823 rows = self.execall(tab.select().where(and_(*args)))
824 if len(rows) > 0:
825 out = []
826 authtab = self.tables['authors']
827 patab = self.tables['publication_authors']
828 for row in rows:
829 q = select(authtab.c.name).where(and_(authtab.c.id==patab.c.author_id,
830 patab.c.publication_id==row.id))
831 authors = tuple([i[0] for i in self.execall(q)])
832 out.append(CifPublication(row.id, row.journalname, row.year,
833 row.volume, row.page_first,
834 row.page_last, authors))
835 return out
836 return None
839 def add_publication(self, journalname, year, authorlist, volume=None,
840 page_first=None, page_last=None, with_authors=True):
842 args = dict(journalname=journalname, year=year)
843 if volume is not None:
844 args['volume'] = volume
845 if page_first is not None:
846 args['page_first'] = page_first
847 if page_last is not None:
848 args['page_last'] = page_last
850 self.insert('publications', **args)
851 self.session.flush()
852 pub = self.get_publications(journalname, year, volume=volume,
853 page_first=page_first,
854 page_last=page_last)[0]
856 if with_authors:
857 for name in authorlist:
858 auth = self._get_tablerow('authors', name, add=True)
859 self.insert('publication_authors',
860 publication_id=pub.id, author_id=auth.id)
861 return pub
863 def add_cifdata(self, cif_id, mineral_id, publication_id,
864 spacegroup_id, formula=None, compound=None,
865 formula_title=None, pub_title=None, a=None, b=None,
866 c=None, alpha=None, beta=None, gamma=None, url='',
867 cell_volume=None, crystal_density=None,
868 atoms_sites=None, atoms_x=None, atoms_y=None,
869 atoms_z=None, atoms_occupancy=None, atoms_u_iso=None,
870 atoms_aniso_label=None, atoms_aniso_u11=None,
871 atoms_aniso_u22=None, atoms_aniso_u33=None,
872 atoms_aniso_u12=None, atoms_aniso_u13=None,
873 atoms_aniso_u23=None, with_elements=True):
875 self.insert('cif', id=cif_id, mineral_id=mineral_id,
876 publication_id=publication_id,
877 spacegroup_id=spacegroup_id,
878 formula_title=formula_title, pub_title=pub_title,
879 formula=formula, compound=compound, url=url, a=a, b=b,
880 c=c, alpha=alpha, beta=beta, gamma=gamma,
881 cell_volume=cell_volume,
882 crystal_density=crystal_density,
883 atoms_sites=atoms_sites, atoms_x=atoms_x,
884 atoms_y=atoms_y, atoms_z=atoms_z,
885 atoms_occupancy=atoms_occupancy,
886 atoms_u_iso=atoms_u_iso,
887 atoms_aniso_label=atoms_aniso_label,
888 atoms_aniso_u11=atoms_aniso_u11,
889 atoms_aniso_u22=atoms_aniso_u22,
890 atoms_aniso_u33=atoms_aniso_u33,
891 atoms_aniso_u12=atoms_aniso_u12,
892 atoms_aniso_u13=atoms_aniso_u13,
893 atoms_aniso_u23=atoms_aniso_u23)
895 if with_elements:
896 for element in chemparse(formula).keys():
897 self.insert('cif_elements', cif_id=cif_id, element=element)
898 return self.get_cif(cif_id)
901 def add_ciffile(self, filename, cif_id=None, url='', debug=False):
903 if CifParser is None:
904 raise ValueError("CifParser from pymatgen not available. Try 'pip install pymatgen'.")
905 try:
906 dat, formula, symm_xyz = parse_cif_file(filename)
907 except:
908 raise ValueError(f"unknown error trying to parse CIF file: {filename}")
910 # compound
911 compound = '<missing>'
912 for compname in ('_chemical_compound_source',
913 '_chemical_name_systematic',
914 '_chemical_name_common'):
915 if compname in dat:
916 compound = dat[compname]
919 # spacegroup
920 sgroup_name = dat.get('_symmetry_space_group_name_H-M', None)
921 if sgroup_name is None:
922 for key, val in dat.items():
923 if 'space_group' in key and 'H-M' in key:
924 sgroup_name = val
926 sgroup = self.get_spacegroup(sgroup_name)
927 if sgroup is not None and sgroup.symmetry_xyz != symm_xyz:
928 for i in range(1, 11):
929 tgroup_name = sgroup_name + f' %var{i:d}%'
930 sgroup = self.get_spacegroup(tgroup_name)
931 if sgroup is None or sgroup.symmetry_xyz == symm_xyz:
932 sgroup_name = tgroup_name
933 break
934 if sgroup is None:
935 sgroup = self.add_spacegroup(sgroup_name, symm_xyz)
937 min_name = '<missing>'
938 for mname in ('_chemical_name_mineral',
939 '_chemical_name_common'):
940 if mname in dat:
941 min_name = dat[mname]
942 mineral = self._get_tablerow('minerals', min_name)
944 # get publication data (including ISCD style of 'citation' in place of 'journal' )
945 pubdict = dict(journalname=dat.get('_journal_name_full', None),
946 year=dat.get('_journal_year', None),
947 volume=dat.get('_journal_volume', None),
948 page_first=dat.get('_journal_page_first', None),
949 page_last=dat.get('_journal_page_last', None))
951 for key, alt, dval in (('journalname', 'journal_full', 'No Journal'),
952 ('year', None, -1),
953 ('volume', 'journal_volume', 0),
954 ('page_first', None, 0),
955 ('page_last', None, 0)):
956 if pubdict[key] is None:
957 if alt is None:
958 alt = key
959 alt = '_citation_%s' % alt
960 pubdict[key] = dat.get(alt, [dval])[0]
961 authors = dat.get('_publ_author_name', None)
962 if authors is None:
963 authors = dat.get('_citation_author_name', ['Anonymous'])
965 pubs = self.get_publications(**pubdict)
966 if pubs is None:
967 pub = self.add_publication(pubdict['journalname'],
968 pubdict['year'], authors,
969 volume=pubdict['volume'],
970 page_first=pubdict['page_first'],
971 page_last=pubdict['page_last'])
972 else:
973 pub = pubs[0]
975 density = dat.get('_exptl_crystal_density_meas', None)
976 if density is None:
977 density = dat.get('_exptl_crystal_density_diffrn', -1.0)
979 if cif_id is None:
980 cif_id = dat.get('_database_code_amcsd', None)
981 if cif_id is None:
982 cif_id = dat.get('_cod_database_code', None)
983 if cif_id is None:
984 cif_id = self.next_cif_id()
985 cif_id = int(cif_id)
987 # check again for this cif id (must match CIF AMS id and formula
988 tabcif = self.tables['cif']
989 this = self.execone(select(tabcif.c.id, tabcif.c.formula
990 ).where(tabcif.c.id==int(cif_id)))
991 if this is not None:
992 _cid, _formula = this
993 if formula.replace(' ', '') == _formula.replace(' ', ''):
994 return cif_id
995 else:
996 cif_id = self.next_cif_id()
998 if debug:
999 print("##CIF Would add Cif Data !" )
1000 print(cif_id, mineral.id, pub.id, sgroup.id)
1001 print("##CIF formuala / compound: ", formula, compound)
1002 print("titles: ",
1003 dat.get('_amcsd_formula_title', '<missing>'),
1004 dat.get('_publ_section_title', '<missing>'))
1005 print("##CIF atom sites :", json.dumps(dat['_atom_site_label']))
1006 print("##CIF locations : ",
1007 put_optarray(dat, '_atom_site_fract_x'),
1008 put_optarray(dat, '_atom_site_fract_y'),
1009 put_optarray(dat, '_atom_site_fract_z'),
1010 put_optarray(dat, '_atom_site_occupancy'),
1011 put_optarray(dat, '_atom_site_U_iso_or_equiv'))
1012 print("##CIF aniso label : ",
1013 json.dumps(dat.get('_atom_site_aniso_label', '<missing>')))
1014 print("##CIF aniso : ",
1015 put_optarray(dat, '_atom_site_aniso_U_11'),
1016 put_optarray(dat, '_atom_site_aniso_U_22'),
1017 put_optarray(dat, '_atom_site_aniso_U_33'),
1018 put_optarray(dat, '_atom_site_aniso_U_12'),
1019 put_optarray(dat, '_atom_site_aniso_U_13'),
1020 put_optarray(dat, '_atom_site_aniso_U_23'))
1021 print('##CIF cell data: ', dat['_cell_length_a'],
1022 dat['_cell_length_b'],
1023 dat['_cell_length_c'],
1024 dat['_cell_angle_alpha'],
1025 dat['_cell_angle_beta'],
1026 dat['_cell_angle_gamma'])
1027 print("##CIF volume/ density ", dat.get('_cell_volume', -1), density)
1028 print("##CIF url : ", type(url), url)
1030 self.add_cifdata(cif_id, mineral.id, pub.id, sgroup.id,
1031 formula=formula, compound=compound,
1032 formula_title=dat.get('_amcsd_formula_title', '<missing>'),
1033 pub_title=dat.get('_publ_section_title', '<missing>'),
1034 atoms_sites=json.dumps(dat['_atom_site_label']),
1035 atoms_x=put_optarray(dat, '_atom_site_fract_x'),
1036 atoms_y=put_optarray(dat, '_atom_site_fract_y'),
1037 atoms_z=put_optarray(dat, '_atom_site_fract_z'),
1038 atoms_occupancy=put_optarray(dat, '_atom_site_occupancy'),
1039 atoms_u_iso=put_optarray(dat, '_atom_site_U_iso_or_equiv'),
1040 atoms_aniso_label=json.dumps(dat.get('_atom_site_aniso_label', '<missing>')),
1041 atoms_aniso_u11=put_optarray(dat, '_atom_site_aniso_U_11'),
1042 atoms_aniso_u22=put_optarray(dat, '_atom_site_aniso_U_22'),
1043 atoms_aniso_u33=put_optarray(dat, '_atom_site_aniso_U_33'),
1044 atoms_aniso_u12=put_optarray(dat, '_atom_site_aniso_U_12'),
1045 atoms_aniso_u13=put_optarray(dat, '_atom_site_aniso_U_13'),
1046 atoms_aniso_u23=put_optarray(dat, '_atom_site_aniso_U_23'),
1047 a=dat['_cell_length_a'],
1048 b=dat['_cell_length_b'],
1049 c=dat['_cell_length_c'],
1050 alpha=dat['_cell_angle_alpha'],
1051 beta=dat['_cell_angle_beta'],
1052 gamma=dat['_cell_angle_gamma'],
1053 cell_volume=dat.get('_cell_volume', -1),
1054 crystal_density=density,
1055 url=url)
1056 return cif_id
1058 def get_cif(self, cif_id, as_strings=False):
1059 """get Cif Structure object """
1060 tab = self.tables['cif']
1062 cif = self.execone(tab.select().where(tab.c.id==cif_id))
1063 if cif is None:
1064 return
1066 tab_pub = self.tables['publications']
1067 tab_auth = self.tables['authors']
1068 tab_pa = self.tables['publication_authors']
1069 tab_min = self.tables['minerals']
1070 tab_sp = self.tables['spacegroups']
1071 mineral = self.execone(tab_min.select().where(tab_min.c.id==cif.mineral_id))
1072 sgroup = self.execone(tab_sp.select().where(tab_sp.c.id==cif.spacegroup_id))
1073 hm_symbol = sgroup.hm_notation
1074 if '%var' in hm_symbol:
1075 hm_symbol = hm_symbol.split('%var')[0]
1077 pub = self.get_publications(id=cif.publication_id)[0]
1079 out = CifStructure(ams_id=cif_id, publication=pub,
1080 mineral=mineral, spacegroup=sgroup,
1081 hm_symbol=hm_symbol, ams_db=self)
1083 for attr in ('formula_title', 'compound', 'formula', 'pub_title'):
1084 setattr(out, attr, getattr(cif, attr, '<missing>'))
1085 for attr in ('a', 'b', 'c', 'alpha', 'beta', 'gamma',
1086 'cell_volume', 'crystal_density'):
1087 val = getattr(cif, attr, '-1')
1088 if not as_strings:
1089 if val is not None:
1090 if '(' in val:
1091 val = val.split('(')[0]
1092 if ',' in val and '.' not in val:
1093 val = val.replace(',', '.')
1094 try:
1095 val = float(val)
1096 except:
1097 pass
1098 setattr(out, attr, val)
1100 for attr in ('atoms_sites', 'atoms_aniso_label'):
1101 val = getattr(cif, attr, '<missing>')
1102 val = '<missing>' if val in (None, '<missing>') else json.loads(val)
1103 setattr(out, attr, val)
1105 if out.atoms_sites not in (None, '<missing>'):
1106 out.natoms = len(out.atoms_sites)
1107 for attr in ('atoms_x', 'atoms_y', 'atoms_z', 'atoms_occupancy',
1108 'atoms_u_iso', 'atoms_aniso_u11', 'atoms_aniso_u22',
1109 'atoms_aniso_u33', 'atoms_aniso_u12',
1110 'atoms_aniso_u13', 'atoms_aniso_u23'):
1111 try:
1112 val = get_optarray(getattr(cif, attr))
1113 if val == '0':
1114 val = None
1115 elif not as_strings:
1116 tmp = []
1117 for i in range(len(val)):
1118 v = val[i]
1119 if v in ('?', '.'):
1120 v = 2.
1121 else:
1122 v = float(v)
1123 tmp.append(v)
1124 val = tmp
1125 setattr(out, attr, val)
1126 except:
1127 print(f"could not parse CIF entry for {cif_id} '{attr}': {val} ")
1129 # we're now ignoring per-cif qvalues
1130 # out.qval = None
1131 # if cif.qdat is not None:
1132 # out.qval = np.unpackbits(np.array([int(b) for b in b64decode(cif.qdat)],
1133 # dtype='uint8'))
1135 out.hkls = None
1136 if hasattr(cif, 'hkls'):
1137 out.hkls = cif.hkls
1139 return out
1141 def next_cif_id(self):
1142 """next available CIF ID > 200000 that is not in current table"""
1143 max_id = 200_000
1144 tabcif = self.tables['cif']
1145 for row in self.execall(select(tabcif.c.id).where(tabcif.c.id>200000)):
1146 if row[0] > max_id:
1147 max_id = row[0]
1148 return max_id + 1
1151 def all_minerals(self):
1152 names = []
1153 for row in self.get_all('minerals'):
1154 if row.name not in names:
1155 names.append(row.name)
1156 return names
1158 def all_authors(self):
1159 names = []
1160 for row in self.get_all('authors'):
1161 if row.name not in names:
1162 names.append(row.name)
1163 return names
1165 def all_journals(self):
1166 names = []
1167 for row in self.get_all('publications'):
1168 if row.journalname not in names:
1169 names.append(row.journalname)
1170 return names
1172 def get_cif_elems(self):
1173 if self.cif_elems is None:
1174 out = {}
1175 for row in self.get_all('cif_elements'):
1176 cifid = int(row.cif_id)
1177 if cifid not in out:
1178 out[cifid] = []
1179 if row.element not in out[cifid]:
1180 out[cifid].append(row.element)
1182 self.cif_elems = out
1183 return self.cif_elems
1186 def find_cifs(self, id=None, mineral_name=None, author_name=None,
1187 journal_name=None, contains_elements=None,
1188 excludes_elements=None, strict_contains=False,
1189 full_occupancy=False, max_matches=1000):
1190 """return list of CIF Structures matching mineral, publication, or elements
1191 """
1192 if id is not None:
1193 thiscif = self.get_cif(id)
1194 if thiscif is not None:
1195 return [thiscif]
1197 tabcif = self.tables['cif']
1198 tabmin = self.tables['minerals']
1199 tabpub = self.tables['publications']
1200 tabaut = self.tables['authors']
1201 tab_ap = self.tables['publication_authors']
1202 tab_ce = self.tables['cif_elements']
1204 matches = []
1205 t0 = time.time()
1206 if mineral_name is None:
1207 mineral_name = ''
1208 mineral_name = mineral_name.strip()
1210 if mineral_name not in (None, '') and ('*' in mineral_name or
1211 '^' in mineral_name or
1212 '$' in mineral_name):
1213 pattern = mineral_name.replace('*', '.*').replace('..*', '.*')
1214 matches = []
1215 for row in self.get_all('minerals'):
1216 if re.search(pattern, row.name, flags=re.IGNORECASE) is not None:
1217 query = select(tabcif.c.id).where(tabcif.c.mineral_id==row.id)
1218 for m in [row[0] for row in self.execall(query)]:
1219 if m not in matches:
1220 matches.append(m)
1222 if journal_name not in (None, ''):
1223 pattern = journal_name.replace('*', '.*').replace('..*', '.*')
1224 new_matches = []
1225 for c in matches:
1226 pub_id = self.execone(select(tabcif.c.publication_id
1227 ).where(tabcif.c.id==c))
1228 this_journal = self.execone(select(tabpub.c.journalname
1229 ).where(tabpub.c.id==pub_id))
1230 if re.search(pattern, this_journal, flags=re.IGNORECASE) is not None:
1231 new_matches.append[c]
1232 matches = new_matches
1235 else: # strict mineral name or no mineral name
1236 args = []
1237 if mineral_name not in (None, ''):
1238 args.append(func.lower(tabmin.c.name)==mineral_name.lower())
1239 args.append(tabmin.c.id==tabcif.c.mineral_id)
1241 if journal_name not in (None, ''):
1242 args.append(func.lower(tabpub.c.journalname)==journal_name.lower())
1243 args.append(tabpub.c.id==tabcif.c.publication_id)
1245 if author_name not in (None, ''):
1246 args.append(func.lower(tabaut.c.name)==author_name.lower())
1247 args.append(tabcif.c.publication_id==tab_ap.c.publication_id)
1248 args.append(tabaut.c.id==tab_ap.c.author_id)
1250 query = select(tabcif.c.id)
1251 if len(args) > 0:
1252 query = select(tabcif.c.id).where(and_(*args))
1253 matches = [row[0] for row in self.execall(query)]
1254 matches = list(set(matches))
1255 #
1256 cif_elems = self.get_cif_elems()
1257 if contains_elements is not None:
1258 for el in contains_elements:
1259 new_matches = []
1260 for row in matches:
1261 if row in cif_elems and el in cif_elems[row]:
1262 new_matches.append(row)
1263 matches = new_matches
1265 if strict_contains:
1266 excludes_elements = ATOM_SYMS[:]
1267 for c in contains_elements:
1268 if c in excludes_elements:
1269 excludes_elements.remove(c)
1270 if excludes_elements is not None:
1271 bad = []
1272 for el in excludes_elements:
1273 for row in matches:
1274 if el in cif_elems[row] and row not in bad:
1275 bad.append(row)
1276 for row in bad:
1277 matches.remove(row)
1280 if full_occupancy:
1281 good = []
1282 for cif_id in matches:
1283 cif = self.execone(tabcif.select().where(tabcif.c.id==cif_id))
1284 occ = get_optarray(getattr(cif, 'atoms_occupancy'))
1285 if occ in ('0', 0, None):
1286 good.append(cif_id)
1287 else:
1288 try:
1289 min_wt = min([float(x) for x in occ])
1290 except:
1291 min_wt = 0
1292 if min_wt > 0.96:
1293 good.append(cif_id)
1294 matches = good
1296 if len(matches) > max_matches:
1297 matches = matches[:max_matches]
1298 return [self.get_cif(cid) for cid in matches]
1300 def set_hkls(self, cifid, hkls, degens):
1301 ctab = self.tables['cif']
1302 packed_hkls = pack_hkl_degen(hkls, degens)
1303 self.update(ctab, whereclause=(ctab.c.id == cifid), hkls=packed_hkls)
1304 return packed_hkls
1306def get_amcsd(download_full=True, timeout=30):
1307 """return instance of the AMCSD CIF Database
1309 Returns:
1310 AMCSD database
1311 Example:
1313 """
1314 global _CIFDB
1315 if _CIFDB is not None:
1316 return _CIFDB
1318 dbfull = os.path.join(user_larchdir, AMCSD_FULL)
1319 if os.path.exists(dbfull):
1320 _CIFDB = AMCSD(dbfull)
1321 return _CIFDB
1322 t0 = time.time()
1323 if download_full:
1324 requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
1325 for src in SOURCE_URLS:
1326 url = f"{src:s}/{AMCSD_FULL:s}"
1327 req = requests.get(url, verify=True, timeout=timeout)
1328 if req.status_code == 200:
1329 break
1330 if req.status_code == 200:
1331 with open(dbfull, 'wb') as fh:
1332 fh.write(req.content)
1333 print("Downloaded %s : %.2f sec" % (dbfull, time.time()-t0))
1334 time.sleep(0.25)
1335 _CIFDB = AMCSD(dbfull)
1336 return _CIFDB
1337 # finally download of full must have failed
1338 return AMCSD()
1340def get_cif(ams_id):
1341 """
1342 get CIF Structure by AMS ID
1343 """
1344 db = get_amcsd()
1345 return db.get_cif(ams_id)
1347def find_cifs(mineral_name=None, journal_name=None, author_name=None,
1348 contains_elements=None, excludes_elements=None,
1349 strict_contains=False, full_occupancy=False):
1351 """
1352 return a list of CIF Structures matching a set of criteria:
1354 mineral_name: case-insensitive match of mineral name
1355 journal_name:
1356 author_name:
1357 containselements: list of atomic symbols required to be in structure
1358 excludes_elements: list of atomic symbols required to NOT be in structure
1359 strict_contains: `contains_elements` is complete -- no other elements
1362 """
1363 db = get_amcsd()
1364 return db.find_cifs(mineral_name=mineral_name,
1365 journal_name=journal_name,
1366 author_name=author_name,
1367 contains_elements=contains_elements,
1368 excludes_elements=excludes_elements,
1369 strict_contains=strict_contains,
1370 full_occupancy=full_occupancy)