Coverage for /Users/Newville/Codes/xraylarch/larch/xrd/cifdb.py: 10%

754 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-09 10:08 -0600

1#!/usr/bin/env python 

2''' 

3build American Mineralogist Crystal Structure Databse (amcsd) 

4''' 

5 

6import os 

7import requests 

8import numpy as np 

9from itertools import groupby 

10import larch 

11from .xrd_fitting import peaklocater 

12from .xrd_cif import create_xrdcif, SPACEGROUPS 

13from .xrd_tools import lambda_from_E 

14 

15import json 

16from larch.utils.jsonutils import encode4js, decode4js 

17 

18from sqlalchemy import (create_engine, MetaData, Table, Column, Integer, 

19 String, Unicode, PrimaryKeyConstraint, 

20 ForeignKeyConstraint, ForeignKey, Numeric, func, 

21 and_, or_, not_, tuple_) 

22 

23from sqlalchemy.ext.declarative import declarative_base 

24from sqlalchemy.orm import sessionmaker, relationship 

25from sqlalchemy.pool import SingletonThreadPool 

26 

27SYMMETRIES = ['triclinic', 'monoclinic', 'orthorhombic', 'tetragonal', 

28 'trigonal', 'hexagonal', 'cubic'] 

29 

30ELEMENTS = [['1', 'Hydrogen', 'H'], ['2', 'Helium', 'He'], ['3', 'Lithium', 'Li'], 

31 ['4', 'Beryllium', 'Be'], ['5', 'Boron', 'B'], ['6', 'Carbon', 'C'], 

32 ['7', 'Nitrogen', 'N'], ['8', 'Oxygen', 'O'], ['9', 'Fluorine', 'F'], 

33 ['10', 'Neon', 'Ne'], ['11', 'Sodium', 'Na'], ['12', 'Magnesium', 'Mg'], 

34 ['13', 'Aluminum', 'Al'], ['14', 'Silicon', 'Si'], ['15', 'Phosphorus', 'P'], 

35 ['16', 'Sulfur', 'S'], ['17', 'Chlorine', 'Cl'], ['18', 'Argon', 'Ar'], 

36 ['19', 'Potassium', 'K'], ['20', 'Calcium', 'Ca'], ['21', 'Scandium', 'Sc'], 

37 ['22', 'Titanium', 'Ti'], ['23', 'Vanadium', 'V'], ['24', 'Chromium', 'Cr'], 

38 ['25', 'Manganese', 'Mn'], ['26', 'Iron', 'Fe'], ['27', 'Cobalt', 'Co'], 

39 ['28', 'Nickel', 'Ni'], ['29', 'Copper', 'Cu'], ['30', 'Zinc', 'Zn'], 

40 ['31', 'Gallium', 'Ga'], ['32', 'Germanium', 'Ge'], ['33', 'Arsenic', 'As'], 

41 ['34', 'Selenium', 'Se'], ['35', 'Bromine', 'Br'], ['36', 'Krypton', 'Kr'], 

42 ['37', 'Rubidium', 'Rb'], ['38', 'Strontium', 'Sr'], ['39', 'Yttrium', 'Y'], 

43 ['40', 'Zirconium', 'Zr'], ['41', 'Niobium', 'Nb'], ['42', 'Molybdenum', 'Mo'], 

44 ['43', 'Technetium', 'Tc'], ['44', 'Ruthenium', 'Ru'], ['45', 'Rhodium', 'Rh'], 

45 ['46', 'Palladium', 'Pd'], ['47', 'Silver', 'Ag'], ['48', 'Cadmium', 'Cd'], 

46 ['49', 'Indium', 'In'], ['50', 'Tin', 'Sn'], ['51', 'Antimony', 'Sb'], 

47 ['52', 'Tellurium', 'Te'], ['53', 'Iodine', 'I'], ['54', 'Xenon', 'Xe'], 

48 ['55', 'Cesium', 'Cs'], ['56', 'Barium', 'Ba'], ['57', 'Lanthanum', 'La'], 

49 ['58', 'Cerium', 'Ce'], ['59', 'Praseodymium', 'Pr'], ['60', 'Neodymium', 'Nd'], 

50 ['61', 'Promethium', 'Pm'], ['62', 'Samarium', 'Sm'], ['63', 'Europium', 'Eu'], 

51 ['64', 'Gadolinium', 'Gd'], ['65', 'Terbium', 'Tb'], ['66', 'Dysprosium', 'Dy'], 

52 ['67', 'Holmium', 'Ho'], ['68', 'Erbium', 'Er'], ['69', 'Thulium', 'Tm'], 

53 ['70', 'Ytterbium', 'Yb'], ['71', 'Lutetium', 'Lu'], ['72', 'Hafnium', 'Hf'], 

54 ['73', 'Tantalum', 'Ta'], ['74', 'Tungsten', 'W'], ['75', 'Rhenium', 'Re'], 

55 ['76', 'Osmium', 'Os'], ['77', 'Iridium', 'Ir'], ['78', 'Platinum', 'Pt'], 

56 ['79', 'Gold', 'Au'], ['80', 'Mercury', 'Hg'], ['81', 'Thallium', 'Tl'], 

57 ['82', 'Lead', 'Pb'], ['83', 'Bismuth', 'Bi'], ['84', 'Polonium', 'Po'], 

58 ['85', 'Astatine', 'At'], ['86', 'Radon', 'Rn'], ['87', 'Francium', 'Fr'], 

59 ['88', 'Radium', 'Ra'], ['89', 'Actinium', 'Ac'], ['90', 'Thorium', 'Th'], 

60 ['91', 'Protactinium', 'Pa'], ['92', 'Uranium', 'U'], ['93', 'Neptunium', 'Np'], 

61 ['94', 'Plutonium', 'Pu'], ['95', 'Americium', 'Am'], ['96', 'Curium', 'Cm'], 

62 ['97', 'Berkelium', 'Bk'], ['98', 'Californium', 'Cf'], ['99', 'Einsteinium', 'Es'], 

63 ['100', 'Fermium', 'Fm'], ['101', 'Mendelevium', 'Md'], ['102', 'Nobelium', 'No'], 

64 ['103', 'Lawrencium', 'Lr'], ['104', 'Rutherfordium', 'Rf'], ['105', 'Dubnium', 'Db'], 

65 ['106', 'Seaborgium', 'Sg'], ['107', 'Bohrium', 'Bh'], ['108', 'Hassium', 'Hs'], 

66 ['109', 'Meitnerium', 'Mt'], ['110', 'Darmstadtium', 'Ds'], ['111', 'Roentgenium', 'Rg'], 

67 ['112', 'Ununbium', 'Uub'], ['113', 'Ununtrium', 'Uut'], ['114', 'Ununquadium', 'Uuq'], 

68 ['115', 'Ununpentium', 'Uup'], ['116', 'Ununhexium', 'Uuh'], ['117', 'Ununseptium', 'Uus'], 

69 ['118', 'Ununoctium', 'Uuo']] 

70 

71CATEGORIES = ['soil', 

72 'salt', 

73 'clay'] 

74 

75QMIN = 0.2 

76QMAX = 10.0 

77QSTEP = 0.01 

78QAXIS = np.arange(QMIN, QMAX+QSTEP, QSTEP) 

79 

80ENERGY = 19000 ## units eV 

81_cifdb = None 

82 

83def get_cifdb(dbname='amcsd_cif0.db', _larch=None): 

84 global _cifdb 

85 if _cifdb is None: 

86 _cifdb = cifDB(dbname=dbname) 

87 if _larch is not None: 

88 symname = '_xray._cifdb' 

89 if not _larch.symtable.has_symbol(symname): 

90 _larch.symtable.set_symbol(symname, _cifdb) 

91 return _cifdb 

92 

93def make_engine(dbname): 

94 return create_engine('sqlite:///%s' % (dbname), 

95 poolclass=SingletonThreadPool) 

96 

97def iscifDB(dbname): 

98 ''' 

99 test if a file is a valid scan database: 

100 must be a sqlite db file, with tables named according to _tables 

101 ''' 

102 _tables = ('ciftbl', 

103 'elemtbl', 

104 'nametbl', 

105 #'formtbl', 

106 'spgptbl', 

107 'symtbl', 

108 'authtbl', 

109 'qtbl', 

110 'cattbl', 

111 'symref', 

112 #'compref', 

113 #'qref', 

114 'authref', 

115 'catref') 

116 result = False 

117 try: 

118 engine = make_engine(dbname) 

119 meta = MetaData() 

120 meta.reflect(bind=engine) 

121 result = all([t in meta.tables for t in _tables]) 

122 except: 

123 pass 

124 return result 

125 

126 

127class cifDB(object): 

128 ''' 

129 interface to the American Mineralogist Crystal Structure Database 

130 ''' 

131 def __init__(self, dbname=None, read_only=True,verbose=False): 

132 

133 ## This needs to be modified for creating new if does not exist. 

134 self.version = '0.0.2' 

135 self.dbname = dbname 

136 if verbose: 

137 print('\n\n================ %s ================\n' % self.dbname) 

138 if not os.path.exists(self.dbname): 

139 parent, child = os.path.split(__file__) 

140 self.dbname = os.path.join(parent, self.dbname) 

141 if not os.path.exists(self.dbname): 

142 print("File '%s' not found; building a new database!" % self.dbname) 

143 self.create_cifdb(name=self.dbname) 

144 else: 

145 if not iscifDB(self.dbname): 

146 raise ValueError("'%s' is not a valid cif database file!" % self.dbname) 

147 

148 self.dbname = self.dbname 

149 self.engine = make_engine(self.dbname) 

150 self.conn = self.engine.connect() 

151 

152 kwargs = {} 

153 if read_only: 

154 kwargs = {'autoflush': True, 'autocommit':False} 

155 def readonly_flush(*args, **kwargs): 

156 return 

157 self.session = sessionmaker(bind=self.engine, **kwargs)() 

158 self.session.flush = readonly_flush 

159 else: 

160 self.session = sessionmaker(bind=self.engine, **kwargs)() 

161 

162 self.metadata = MetaData() 

163 self.metadata.reflect(bind=self.engine) 

164 tables = self.tables = self.metadata.tables 

165 

166 ## Load tables 

167 self.elemtbl = Table('elemtbl', self.metadata) 

168 self.nametbl = Table('nametbl', self.metadata) 

169 self.formtbl = Table('formtbl', self.metadata) 

170 self.spgptbl = Table('spgptbl', self.metadata) 

171 self.symtbl = Table('symtbl', self.metadata) 

172 self.authtbl = Table('authtbl', self.metadata) 

173 self.qtbl = Table('qtbl', self.metadata) 

174 self.cattbl = Table('cattbl', self.metadata) 

175 

176 self.symref = Table('symref', self.metadata) 

177 self.compref = Table('compref', self.metadata) 

178 self.qref = Table('qref', self.metadata) 

179 self.authref = Table('authref', self.metadata) 

180 self.catref = Table('catref', self.metadata) 

181 

182 self.ciftbl = Table('ciftbl', self.metadata) 

183 

184 self.axis = np.array([float(q[0]) for q in self.query(self.qtbl.c.q).all()]) 

185 

186 

187 def query(self, *args, **kws): 

188 "generic query" 

189 return self.session.query(*args, **kws) 

190 

191 def close(self): 

192 "close session" 

193 self.session.flush() 

194 self.session.close() 

195 

196 def create_cifdb(self,name=None,verbose=False): 

197 

198 if name is None: 

199 self.dbname = 'amcsd_cif0.db' 

200 counter = 0 

201 while os.path.exists(self.dbname % counter): 

202 counter += 1 

203 self.dbname = self.dbname % counter 

204 else: 

205 self.dbname = name 

206 

207 self.open_database() 

208 

209 ################################################### 

210 ## Look up tables 

211 elemtbl = Table('elemtbl', self.metadata, 

212 Column('z', Integer, primary_key=True), 

213 Column('element_name', String(40), unique=True, nullable=True), 

214 Column('element_symbol', String(2), unique=True, nullable=False) 

215 ) 

216 nametbl = Table('nametbl', self.metadata, 

217 Column('mineral_id', Integer, primary_key=True), 

218 Column('mineral_name', String(30), unique=True, nullable=True) 

219 ) 

220 formtbl = Table('formtbl', self.metadata, 

221 Column('formula_id', Integer, primary_key=True), 

222 Column('formula_name', String(30), unique=True, nullable=True) 

223 ) 

224 spgptbl = Table('spgptbl', self.metadata, 

225 Column('iuc_id', Integer), 

226 Column('hm_notation', String(16), unique=True, nullable=True), 

227 PrimaryKeyConstraint('iuc_id', 'hm_notation') 

228 ) 

229 symtbl = Table('symtbl', self.metadata, 

230 Column('symmetry_id', Integer, primary_key=True), 

231 Column('symmetry_name', String(16), unique=True, nullable=True) 

232 ) 

233 authtbl = Table('authtbl', self.metadata, 

234 Column('author_id', Integer, primary_key=True), 

235 Column('author_name', String(40), unique=True, nullable=True) 

236 ) 

237 qtbl = Table('qtbl', self.metadata, 

238 Column('q_id', Integer, primary_key=True), 

239 #Column('q', Float()) ## how to make this work? mkak 2017.02.14 

240 Column('q', String()) 

241 ) 

242 cattbl = Table('cattbl', self.metadata, 

243 Column('category_id', Integer, primary_key=True), 

244 Column('category_name', String(16), unique=True, nullable=True) 

245 ) 

246 ################################################### 

247 ## Cross-reference tables 

248 symref = Table('symref', self.metadata, 

249 Column('iuc_id', None, ForeignKey('spgptbl.iuc_id')), 

250 Column('symmetry_id', None, ForeignKey('symtbl.symmetry_id')), 

251 PrimaryKeyConstraint('iuc_id', 'symmetry_id') 

252 ) 

253 compref = Table('compref', self.metadata, 

254 Column('z', None, ForeignKey('elemtbl.z')), 

255 Column('amcsd_id', None, ForeignKey('ciftbl.amcsd_id')), 

256 PrimaryKeyConstraint('z', 'amcsd_id') 

257 ) 

258 qref = Table('qref', self.metadata, 

259 Column('q_id', None, ForeignKey('qtbl.q_id')), 

260 Column('amcsd_id', None, ForeignKey('ciftbl.amcsd_id')), 

261 PrimaryKeyConstraint('q_id', 'amcsd_id') 

262 ) 

263 authref = Table('authref', self.metadata, 

264 Column('author_id', None, ForeignKey('authtbl.author_id')), 

265 Column('amcsd_id', None, ForeignKey('ciftbl.amcsd_id')), 

266 PrimaryKeyConstraint('author_id', 'amcsd_id') 

267 ) 

268 catref = Table('catref', self.metadata, 

269 Column('category_id', None, ForeignKey('cattbl.category_id')), 

270 Column('amcsd_id', None, ForeignKey('ciftbl.amcsd_id')), 

271 PrimaryKeyConstraint('category_id', 'amcsd_id') 

272 ) 

273 ################################################### 

274 ## Main table 

275 ciftbl = Table('ciftbl', self.metadata, 

276 Column('amcsd_id', Integer, primary_key=True), 

277 Column('mineral_id', Integer), 

278 Column('formula_id', Integer), 

279 Column('iuc_id', ForeignKey('spgptbl.iuc_id')), 

280 Column('a', String(5)), 

281 Column('b', String(5)), 

282 Column('c', String(5)), 

283 Column('alpha', String(5)), 

284 Column('beta', String(5)), 

285 Column('gamma', String(5)), 

286 Column('cif', String(25)), ## , nullable=True 

287 Column('zstr',String(25)), 

288 Column('qstr',String(25)), 

289 Column('url',String(25)) 

290 ) 

291 ################################################### 

292 ## Add all to file 

293 self.metadata.create_all() ## if not exists function (callable when exists) 

294 

295 ################################################### 

296 ## Define 'add/insert' functions for each table 

297 def_elem = elemtbl.insert() 

298 def_name = nametbl.insert() 

299 def_form = formtbl.insert() 

300 def_spgp = spgptbl.insert() 

301 def_sym = symtbl.insert() 

302 def_auth = authtbl.insert() 

303 def_q = qtbl.insert() 

304 def_cat = cattbl.insert() 

305 

306 add_sym = symref.insert() 

307 add_comp = compref.insert() 

308 add_q = qref.insert() 

309 add_auth = authref.insert() 

310 add_cat = catref.insert() 

311 

312 new_cif = ciftbl.insert() 

313 

314 

315 ################################################### 

316 ## Populate the fixed tables of the database 

317 

318 ## Adds all elements into database 

319 for element in ELEMENTS: 

320 z, name, symbol = element 

321 def_elem.execute(z=int(z), element_name=name, element_symbol=symbol) 

322 

323 ## Adds all crystal symmetries 

324 for symmetry_id,symmetry in enumerate(SYMMETRIES): 

325 def_sym.execute(symmetry_name=symmetry.strip()) 

326 if symmetry.strip() == 'triclinic': ## triclinic : 1 - 2 

327 for iuc_id in range(1,2+1): 

328 add_sym.execute(iuc_id=iuc_id,symmetry_id=(symmetry_id+1)) 

329 elif symmetry.strip() == 'monoclinic': ## monoclinic : 3 - 15 

330 for iuc_id in range(3,15+1): 

331 add_sym.execute(iuc_id=iuc_id,symmetry_id=(symmetry_id+1)) 

332 elif symmetry.strip() == 'orthorhombic': ## orthorhombic : 16 - 74 

333 for iuc_id in range(16,74+1): 

334 add_sym.execute(iuc_id=iuc_id,symmetry_id=(symmetry_id+1)) 

335 elif symmetry.strip() == 'tetragonal': ## tetragonal : 75 - 142 

336 for iuc_id in range(75,142+1): 

337 add_sym.execute(iuc_id=iuc_id,symmetry_id=(symmetry_id+1)) 

338 elif symmetry.strip() == 'trigonal': ## trigonal : 143 - 167 

339 for iuc_id in range(143,167+1): 

340 add_sym.execute(iuc_id=iuc_id,symmetry_id=(symmetry_id+1)) 

341 elif symmetry.strip() == 'hexagonal': ## hexagonal : 168 - 194 

342 for iuc_id in range(168,194+1): 

343 add_sym.execute(iuc_id=iuc_id,symmetry_id=(symmetry_id+1)) 

344 elif symmetry.strip() == 'cubic': ## cubic : 195 - 230 

345 for iuc_id in range(195,230+1): 

346 add_sym.execute(iuc_id=iuc_id,symmetry_id=(symmetry_id+1)) 

347 

348 for cat in CATEGORIES: 

349 def_cat.execute(category_name=cat) 

350 

351 ## Adds qrange 

352 for q in QAXIS: 

353 def_q.execute(q='%0.2f' % q) 

354 

355 ## Adds all space groups 

356 for spgrp_no in SPACEGROUPS.keys(): 

357 for spgrp_name in SPACEGROUPS[spgrp_no]: 

358 try: 

359 def_spgp.execute(iuc_id=spgrp_no, hm_notation=spgrp_name) 

360 except: 

361 if verbose: 

362 print('Duplicate: %s %s' % (spgrp_no,spgrp_name)) 

363 pass 

364 

365 

366 def __add_space_groups(self): 

367 ## Add missing space groups 

368 for spgrp_no in SPACEGROUPS.keys(): 

369 for spgrp_name in SPACEGROUPS[spgrp_no]: 

370 match = False 

371 search_spgrp = self.spgptbl.select(self.spgptbl.c.hm_notation == spgrp_name) 

372 for row in search_spgrp.execute(): 

373 match = True 

374 if match is False: 

375 print('Adding: %s %s' % (spgrp_no,spgrp_name)) 

376 self.spgptbl.insert().execute(iuc_id=spgrp_no,hm_notation=spgrp_name) 

377 

378 def add_ciffile(self, ciffile, verbose=True, url=False, ijklm=1, file=None): 

379 ''' 

380 ## Adds ciffile into database 

381 When reading in new CIF: 

382 i. put entire cif into field 

383 ii. read _database_code_amcsd; write 'amcsd_id' to 'cif data' 

384 iii. read _chemical_name_mineral; find/add in' minerallist'; write 

385 'mineral_id' to 'cif data' 

386 iv. read _symmetry_space_group_name_H-M - find in 'spacegroup'; write 

387 iuc_id to 'cif data' 

388 v. read author name(s) - find/add in 'authorlist'; write 'author_id', 

389 'amcsd_id' to 'authref' 

390 vi. read _chemical_formula_sum; write 'z' (atomic no.), 'amcsd_id' 

391 to 'compref' 

392 vii. calculate q - find each corresponding 'q_id' for all peaks; in write 

393 'q_id','amcsd_id' to 'qpeak' 

394 ''' 

395 

396 if url: 

397 cifstr = requests.get(ciffile).text 

398 else: 

399 with open(ciffile,'rb') as file: 

400 cifstr = str(file.read().decode('utf-8')) 

401 cif = create_xrdcif(text=cifstr) 

402 

403 if cif.id_no is None: 

404 cif_no = 99999 

405 search_cif = self.query(self.ciftbl.c.amcsd_id).filter(self.ciftbl.c.amcsd_id == cif_no).all() 

406 cnt_lp = 0 

407 while len(search_cif) > 0: 

408 cif_no += 1 

409 cnt_lp += 1 

410 search_cif = self.query(self.ciftbl.c.amcsd_id).filter(self.ciftbl.c.amcsd_id == cif_no).all() 

411 if cnt_lp > 500: ## safe guards against infinite loop 

412 print(' *** too many loops to find unassigned AMCSD number.') 

413 return 

414 print(' *** Assigning unnumbered CIF to AMCSD %i' % cif_no) 

415 cif.id_no = cif_no 

416 

417 ## check for amcsd in file already 

418 ## Find amcsd_id in database 

419 self.ciftbl = Table('ciftbl', self.metadata) 

420 search_cif = self.ciftbl.select(self.ciftbl.c.amcsd_id == cif.id_no) 

421 for row in search_cif.execute(): 

422 if verbose: 

423 if url: 

424 print('AMCSD %i already exists in database.\n' % cif.id_no) 

425 else: 

426 print('%s: AMCSD %i already exists in database %s.' % 

427 (os.path.split(ciffile)[-1],cif.id_no,self.dbname)) 

428 return 

429 

430 ## Define q-array for each entry at given energy 

431 qhkl = cif.calc_q(wvlgth=lambda_from_E(ENERGY), q_min=QMIN, q_max=QMAX) 

432 qarr = self.create_q_array(qhkl) 

433 

434 ################################################### 

435 def_name = self.nametbl.insert() 

436 def_form = self.formtbl.insert() 

437 def_spgp = self.spgptbl.insert() 

438 def_sym = self.symtbl.insert() 

439 def_auth = self.authtbl.insert() 

440 def_q = self.qtbl.insert() 

441 def_cat = self.cattbl.insert() 

442 add_sym = self.symref.insert() 

443 add_comp = self.compref.insert() 

444 add_q = self.qref.insert() 

445 add_auth = self.authref.insert() 

446 add_cat = self.catref.insert() 

447 new_cif = self.ciftbl.insert() 

448 

449 ## Find mineral_name 

450 match = False 

451 search_mineral = self.nametbl.select(self.nametbl.c.mineral_name == cif.label) 

452 for row in search_mineral.execute(): 

453 mineral_id = row.mineral_id 

454 match = True 

455 if match is False: 

456 def_name.execute(mineral_name=cif.label) 

457 search_mineral = self.nametbl.select(self.nametbl.c.mineral_name == cif.label) 

458 for row in search_mineral.execute(): 

459 mineral_id = row.mineral_id 

460 

461 ## Find formula_name 

462 match = False 

463 search_formula = self.formtbl.select(self.formtbl.c.formula_name == cif.formula) 

464 for row in search_formula.execute(): 

465 formula_id = row.formula_id 

466 match = True 

467 if match is False: 

468 def_form.execute(formula_name=cif.formula) 

469 search_formula = self.formtbl.select(self.formtbl.c.formula_name == cif.formula) 

470 for row in search_formula.execute(): 

471 formula_id = row.formula_id 

472 

473 ## Find composition (loop over all elements) 

474 z_list = [] 

475 for element in set(cif.atom.label): 

476 search_elements = self.elemtbl.select(self.elemtbl.c.element_symbol == element) 

477 for row in search_elements.execute(): 

478 z_list += [row.z] 

479 zarr = self.create_z_array(z_list) 

480 

481 

482 ## Save CIF entry into database 

483 new_cif.execute(amcsd_id=cif.id_no, 

484 mineral_id=int(mineral_id), 

485 formula_id=int(formula_id), 

486 iuc_id=cif.symmetry.no, 

487 a=str(cif.unitcell[0]), 

488 b=str(cif.unitcell[1]), 

489 c=str(cif.unitcell[2]), 

490 alpha=str(cif.unitcell[3]), 

491 beta=str(cif.unitcell[4]), 

492 gamma=str(cif.unitcell[5]), 

493 cif=cifstr, 

494 zstr=json.dumps(zarr.tolist(),default=str), 

495 qstr=json.dumps(qarr.tolist(),default=str), 

496 url=str(ciffile)) 

497 

498 ## Build q cross-reference table 

499 for q in qhkl: 

500 search_q = self.qtbl.select(self.qtbl.c.q == '%0.2f' % (int(q * 100) / 100.)) 

501 for row in search_q.execute(): 

502 q_id = row.q_id 

503 

504 try: 

505 add_q.execute(q_id=q_id,amcsd_id=cif.id_no) 

506 except: 

507 pass 

508 

509 

510 ## Build composition cross-reference table 

511 for element in set(cif.atom.label): 

512 search_elements = self.elemtbl.select(self.elemtbl.c.element_symbol == element) 

513 for row in search_elements.execute(): 

514 z = row.z 

515 

516 try: 

517 add_comp.execute(z=z, amcsd_id=cif.id_no) 

518 except: 

519 print('could not find element: %s (amcsd: %i)' % (element,cif.id_no)) 

520 pass 

521 

522 ## Find author_name 

523 for author_name in cif.publication.author: 

524 match = False 

525 search_author = self.authtbl.select(self.authtbl.c.author_name == author_name) 

526 for row in search_author.execute(): 

527 author_id = row.author_id 

528 match = True 

529 if match is False: 

530 def_auth.execute(author_name=author_name) 

531 search_author = self.authtbl.select(self.authtbl.c.author_name == author_name) 

532 for row in search_author.execute(): 

533 author_id = row.author_id 

534 match = True 

535 if match == True: 

536 add_auth.execute(author_id=author_id, 

537 amcsd_id=cif.id_no) 

538 

539 # ## not ready for defined categories 

540 # cif_category.execute(category_id='none', 

541 # amcsd_id=cif.id_no) 

542 

543 if url: 

544 self.amcsd_info(cif.id_no, no_qpeaks=np.sum(qarr)) 

545 else: 

546 self.amcsd_info(cif.id_no, no_qpeaks=np.sum(qarr),ciffile=ciffile) 

547 

548 def url_to_cif(self, url=None, verbose=False, savecif=False, addDB=True, 

549 all=False, minval=None): 

550 

551 maxi = 20573 

552 exceptions = [0,7271,10783,14748,15049,15050,15851,18368, 

553 18449,18450,18451,18452,18453,20029] 

554 

555 ## ALL CAUSE FAILURE IN CIFFILE FUNCTION: 

556 ## 7271 : author name doubled in cif 

557 ## 14748 : has label of amcsd code but no number (or anything) assigned 

558 ## 15049 : page number 'L24307 1' could not be parsed as number 

559 ## 15050 : page number 'L24307 1' could not be parsed as number 

560 ## 15851 : no first page number provided despite providing field label 

561 ## 18368 : non-numerical entries in B_iso fields 

562 ## 18449 : no first page number provided despite providing field label 

563 ## 18450 : no first page number provided despite providing field label 

564 ## 20029 : no volume number provided despite providing field label 

565 

566 if url is None: 

567 url = 'http://rruff.geo.arizona.edu/AMS/download.php?id=%05d.cif&down=cif' 

568 

569 ## Defines url range for searching and adding to cif database 

570 if all: 

571 iindex = range(99999) ## trolls whole database online 

572 elif minval is not None: 

573 iindex = np.arange(minval, 99999) ## starts at given min and counts up 

574 else: 

575 iindex = np.arange(13600, 13700) ## specifies small range including CeO2 match 

576 

577 for i in iindex: 

578 if i not in exceptions and i < maxi: 

579 url_to_scrape = url % i 

580 r = requests.get(url_to_scrape) 

581 if r.text.split()[0] == "Can't" or '': 

582 if verbose: 

583 print('\t---> ERROR on amcsd%05d.cif' % i) 

584 else: 

585 if verbose: 

586 print('Reading %s' % url_to_scrape) 

587 if savecif: 

588 file = 'amcsd%05d.cif' % i 

589 f = open(file,'w') 

590 f.write(r.text) 

591 f.close() 

592 if verbose: 

593 print('Saved %s' % file) 

594 if addDB: 

595 try: 

596 self.add_ciffile(url_to_scrape, url=True, verbose=verbose, ijklm=i) 

597 except: 

598 pass 

599 

600 

601 

602 

603################################################################################## 

604################################################################################## 

605 

606# usr_qry = self.query(self.ciftbl, 

607# self.elemtbl,self.nametbl,self.spgptbl,self.symtbl, 

608# self.authtbl,self.qtbl,self.cattbl, 

609# self.authref,self.compref,self.catref,self.symref)\ 

610# .filter(self.authref.c.amcsd_id == self.ciftbl.c.amcsd_id)\ 

611# .filter(self.authtbl.c.author_id == self.authref.c.author_id)\ 

612# .filter(self.compref.c.amcsd_id == self.ciftbl.c.amcsd_id)\ 

613# .filter(self.compref.c.z == self.elemtbl.c.z)\ 

614# .filter(self.catref.c.amcsd_id == self.ciftbl.c.amcsd_id)\ 

615# .filter(self.catref.c.category_id == self.cattbl.c.category_id)\ 

616# .filter(self.nametbl.c.mineral_id == self.ciftbl.c.mineral_id)\ 

617# .filter(self.symref.c.symmetry_id == self.symtbl.c.symmetry_id)\ 

618# .filter(self.symref.c.iuc_id == self.spgptbl.c.iuc_id)\ 

619# .filter(self.spgptbl.c.iuc_id == self.ciftbl.c.iuc_id) 

620 

621################################################################################## 

622################################################################################## 

623 

624 

625################################################################################## 

626 

627 def amcsd_info(self, amcsd_id, no_qpeaks=None, ciffile=None): 

628 mineral_id,iuc_id = self.cif_by_amcsd(amcsd_id,only_ids=True) 

629 mineral_name = self.search_for_mineral(minid=mineral_id)[0].mineral_name 

630 authors = self.author_by_amcsd(amcsd_id) 

631 

632 ## ALLelements,mineral_name,iuc_id,authors = self.all_by_amcsd(amcsd_id) 

633 

634 if ciffile: 

635 print(' ==== File : %s ====' % os.path.split(ciffile)[-1]) 

636 else: 

637 print(' ===================== ') 

638 print(' AMCSD: %i' % amcsd_id) 

639 print(' Name: %s' % mineral_name) 

640 print(' %s' % self.composition_by_amcsd(amcsd_id)) 

641 try: 

642 print(' Space Group No.: %s (%s)' % (iuc_id,self.symm_id(iuc_id))) 

643 except: 

644 print(' Space Group No.: %s' % iuc_id) 

645 if no_qpeaks: 

646 print(' No. q-peaks in range : %s' % no_qpeaks) 

647 

648 authorstr = ' Author(s): ' 

649 for author in authors: 

650 authorstr = '%s %s' % (authorstr,author.split()[0]) 

651 print(authorstr) 

652 print(' ===================== ') 

653 

654 def symm_id(sel, iuc_id): 

655 

656 if not isinstance(iuc_id, int): 

657 iuc_id = int(iuc_id.split(':')[0]) 

658 

659 if iuc_id < 3 : return 'triclinic' ## 1 - 2 : Triclinic 

660 elif iuc_id < 16 : return 'monoclinic' ## 3 - 15 : Monoclinic 

661 elif iuc_id < 75 : return 'orthorhombic' ## 16 - 74 : Orthorhombic 

662 elif iuc_id < 143: return 'tetragonal' ## 75 - 142 : Tetragonal 

663 elif iuc_id < 168: return 'trigonal' ## 143 - 167 : Trigonal 

664 elif iuc_id < 195: return 'hexagonal' ## 168 - 194 : Hexagonal 

665 elif iuc_id < 231: return 'cubic' ## 195 - 230 : Cubic 

666 else: 

667 return 

668 

669 def return_cif(self,amcsd_id): 

670 search_cif = self.ciftbl.select(self.ciftbl.c.amcsd_id == amcsd_id) 

671 for row in search_cif.execute(): 

672 return row.cif 

673 

674################################################################################## 

675 

676 def all_by_amcsd(self, amcsd_id): 

677 

678 mineral_id,iuc_id = self.cif_by_amcsd(amcsd_id,only_ids=True) 

679 

680 mineral_name = self.search_for_mineral(minid=mineral_id)[0].mineral_name 

681 ALLelements = self.composition_by_amcsd(amcsd_id) 

682 authors = self.author_by_amcsd(amcsd_id) 

683 

684 return ALLelements, mineral_name, iuc_id, authors 

685 

686 def q_by_amcsd(self,amcsd_id,qmin=QMIN,qmax=QMAX): 

687 

688 q_results = self.query(self.ciftbl.c.qstr).filter(self.ciftbl.c.amcsd_id == amcsd_id).all() 

689 q_all = [json.loads(qrow[0]) for qrow in q_results] 

690 

691 return [self.axis[i] for i,qi in enumerate(q_all[0]) if qi == 1 and self.axis[i] >= qmin and self.axis[i] <= qmax] 

692 

693 def author_by_amcsd(self,amcsd_id): 

694 

695 search_authors = self.authref.select(self.authref.c.amcsd_id == amcsd_id) 

696 authors = [] 

697 for row in search_authors.execute(): 

698 authors.append(self.search_for_author(row.author_id,id_no=False)[0][0]) 

699 return authors 

700 

701 def composition_by_amcsd(self, amcsd_id): 

702 q = self.query(self.compref).filter(self.compref.c.amcsd_id==amcsd_id) 

703 return [row.z for row in q.all()] 

704 

705 def cif_by_amcsd(self,amcsd_id,only_ids=False): 

706 

707 search_cif = self.ciftbl.select(self.ciftbl.c.amcsd_id == amcsd_id) 

708 for row in search_cif.execute(): 

709 if only_ids: 

710 return row.mineral_id, row.iuc_id 

711 else: 

712 return row.cif 

713 

714 def mineral_by_amcsd(self,amcsd_id): 

715 

716 search_cif = self.ciftbl.select(self.ciftbl.c.amcsd_id == amcsd_id) 

717 for row in search_cif.execute(): 

718 cifstr = row.cif 

719 mineral_id = row.mineral_id 

720 iuc_id = row.iuc_id 

721 

722 search_mineralname = self.nametbl.select(self.nametbl.c.mineral_id == mineral_id) 

723 for row in search_mineralname.execute(): 

724 mineral_name = row.mineral_name 

725 return mineral_name 

726 

727################################################################################## 

728################################################################################## 

729 

730 def amcsd_by_q(self, peaks, qmin=None, qmax=None, qstep=None, list=None, 

731 verbose=False): 

732 

733 if qmin is None: qmin = QMIN 

734 if qmax is None: qmax = QMAX 

735 if qstep is None: qstep = QSTEP 

736 

737 ## Defines min/max limits of q-range 

738 imin, imax = 0, len(self.axis) 

739 if qmax < np.max(self.axis): 

740 imax = abs(self.axis-qmax).argmin() 

741 if qmin > np.min(self.axis): 

742 imin = abs(self.axis-qmin).argmin() 

743 qaxis = self.axis[imin:imax] 

744 stepq = (qaxis[1]-qaxis[0]) 

745 

746 amcsd, q_amcsd = self.match_qc(list=list, qmin=qmin, qmax=qmax) 

747 

748 ## Re-bins data if different step size is specified 

749 if qstep > stepq: 

750 new_qaxis = np.arange(np.min(qaxis),np.max(qaxis)+stepq,qstep) 

751 new_q_amcsd = np.zeros((np.shape(q_amcsd)[0],np.shape(new_qaxis)[0])) 

752 for m,qrow in enumerate(q_amcsd): 

753 for n,qn in enumerate(qrow): 

754 if qn == 1: 

755 k = np.abs(new_qaxis-qaxis[n]).argmin() 

756 new_q_amcsd[m][k] = 1 

757 qaxis = new_qaxis 

758 q_amcsd = new_q_amcsd 

759 

760 

761 ## Create data array 

762 peaks_weighting = np.ones(len(qaxis),dtype=int)*-1 

763 peaks_true = np.zeros(len(qaxis),dtype=int) 

764 peaks_false = np.ones(len(qaxis),dtype=int) 

765 for p in peaks: 

766 i = np.abs(qaxis-p).argmin() 

767 peaks_weighting[i],peaks_true[i],peaks_false[i] = 1,1,0 

768 

769 ## Calculate score/matches/etc. 

770 total_peaks = np.sum((q_amcsd),axis=1) 

771 match_peaks = np.sum((peaks_true*q_amcsd),axis=1) 

772 miss_peaks = np.sum((peaks_false*q_amcsd),axis=1) 

773 scores = np.sum((peaks_weighting*q_amcsd),axis=1) 

774 

775 return sorted(zip(scores, amcsd, total_peaks, match_peaks, miss_peaks), reverse=True) 

776 

777 

778 def amcsd_by_chemistry(self, include=[], exclude=[]): 

779 

780 amcsd_incld = [] 

781 amcsd_excld = [] 

782 z_incld = [] 

783 z_excld = [] 

784 

785 if len(include) > 0: 

786 for element in include: 

787 z = self.get_element(element).z 

788 if z is not None and z not in z_incld: 

789 z_incld += [z] 

790 if isinstance(exclude,bool): 

791 if exclude: 

792 for element in ELEMENTS: 

793 z, name, symbol = element 

794 z = int(z) 

795 if z not in z_incld: 

796 z_excld += [z] 

797 else: 

798 if len(exclude) > 0: 

799 for element in exclude: 

800 z = self.get_element(element).z 

801 if z is not None and z not in z_excld: 

802 z_excld += [z] 

803 

804 z_list_include = [1 if z in z_incld else 0 for z in np.arange(len(ELEMENTS)+1)] 

805 z_list_exclude = [1 if z in z_excld else 0 for z in np.arange(len(ELEMENTS)+1)] 

806 

807 amcsd,z_amcsd = self.return_z_matches(list=list) 

808 

809 ## Calculate score/matches/etc. 

810 match_z = np.sum((np.array(z_list_include)*np.array(z_amcsd)),axis=1) 

811 miss_z = np.sum((np.array(z_list_exclude)*np.array(z_amcsd)),axis=1) 

812 

813 for i,amcsd_id in enumerate(amcsd): 

814 if match_z[i] == np.sum(z_list_include) and miss_z[i] <= 0: 

815 amcsd_incld += [amcsd_id] 

816 else: 

817 amcsd_excld += [amcsd_id] 

818 return amcsd_incld 

819 

820 

821 def amcsd_by_mineral(self, min_name, list=None, verbose=True): 

822 """ 

823 search by mineral name 

824 """ 

825 out = [] 

826 minerals = self.search_for_mineral(name=min_name) 

827 

828 q = self.query(self.ciftbl) 

829 if list is not None: 

830 q = q.filter(self.ciftbl.c.amcsd_id.in_(list)) 

831 

832 ## Searches mineral name for database entries 

833 if len(minerals) > 0: 

834 mids = [m.mineral_id for m in minerals] 

835 q = q.filter(self.ciftbl.c.mineral_id.in_(mids)) 

836 for row in q.all(): 

837 if row.amcsd_id not in out: 

838 out.append(row.amcsd_id) 

839 return out 

840 

841 def amcsd_by_author(self,include=[''],list=None,verbose=True): 

842 

843 amcsd_incld = [] 

844 auth_id = [] 

845 

846 for author in include: 

847 id = self.search_for_author(author) 

848 auth_id += id 

849 

850 ## Searches mineral name for database entries 

851 usr_qry = self.query(self.ciftbl,self.authtbl,self.authref)\ 

852 .filter(self.authref.c.amcsd_id == self.ciftbl.c.amcsd_id)\ 

853 .filter(self.authref.c.author_id == self.authtbl.c.author_id) 

854 if list is not None: 

855 usr_qry = usr_qry.filter(self.ciftbl.c.amcsd_id.in_(list)) 

856 

857 ## Searches author name in database entries 

858 if len(auth_id) > 0: 

859 fnl_qry = usr_qry.filter(self.authref.c.author_id.in_(auth_id)) 

860 ## This currently works in an 'or' fashion, as each name in list 

861 ## can be matched to multiple auth_id values, so it is simpler to 

862 ## consider them all separately. Making a 2D list and restructuring 

863 ## query could improve this 

864 ## mkak 2017.02.24 

865 for row in fnl_qry.all(): 

866 if row.amcsd_id not in amcsd_incld: 

867 amcsd_incld += [row.amcsd_id] 

868 

869 return amcsd_incld 

870 

871 

872 def match_elements(self, elems, exclude=None): 

873 """match structues containing all elements in a list 

874 

875 Arguments: 

876 ---------- 

877 elems list of elements to match 

878 exclude list of elements to exclude for match (default None) 

879 

880 Returns: 

881 -------- 

882 list of amcsd ids for structures 

883 

884 """ 

885 matches = None 

886 q = self.query(self.compref) 

887 

888 for elem in elems: 

889 elem = self.get_element(elem).z 

890 rows = q.filter(self.compref.c.z==elem).all() 

891 sids = [row.amcsd_id for row in rows] 

892 if matches is None: 

893 matches = sids 

894 else: 

895 matches = [s for s in sids if s in matches] 

896 

897 if exclude is not None: 

898 for elem in exclude: 

899 elem = self.get_element(elem).z 

900 for row in q.filter(self.compref.c.z==elem).all(): 

901 if row.amcsd_id in matches: 

902 matches.remove(row.amcsd_id) 

903 return matches 

904 

905 def create_z_array(self,z): 

906 z_array = np.zeros((len(ELEMENTS)+1),dtype=int) ## + 1 gives index equal to z; z[0]:nothing 

907 for zn in z: 

908 z_array[zn] = 1 

909 return z_array 

910 

911 

912################################################################################## 

913################################################################################## 

914 def match_qc(self, list=None, qmin=QMIN, qmax=QMAX): 

915 

916 if list is None: 

917 qqry = self.query(self.ciftbl.c.qstr).all() 

918 idqry = self.query(self.ciftbl.c.amcsd_id).all() 

919 else: 

920 qqry = self.query(self.ciftbl.c.qstr)\ 

921 .filter(self.ciftbl.c.amcsd_id.in_(list))\ 

922 .all() 

923 idqry = self.query(self.ciftbl.c.amcsd_id)\ 

924 .filter(self.ciftbl.c.amcsd_id.in_(list))\ 

925 .all() 

926 

927 imin,imax = 0,len(self.axis) 

928 if qmax < QMAX: imax = abs(self.axis-qmax).argmin() 

929 if qmin > QMIN: imin = abs(self.axis-qmin).argmin() 

930 

931 return [id[0] for id in idqry],[json.loads(q[0])[imin:imax] for q in qqry] 

932 

933 def create_q_array(self, q): 

934 

935 q_array = np.zeros(len(self.axis), dtype=int) 

936 for qn in q: 

937 i = np.abs(self.axis-qn).argmin() 

938 q_array[i] = 1 

939 return q_array 

940 

941################################################################################## 

942 

943 def get_element(self, element): 

944 ''' 

945 searches elements for match in symbol, name, or atomic number; 

946 match must be exact. 

947 

948 returns row with attributes .z, .element_name, .element_symbol 

949 ''' 

950 if isinstance(element, int): 

951 element = '%d' % element 

952 elif isinstance(element, str): 

953 element = element.title() 

954 q = self.query(self.elemtbl) 

955 row = q.filter(or_(self.elemtbl.c.z == element, 

956 self.elemtbl.c.element_symbol == element, 

957 self.elemtbl.c.element_name == element)).one() 

958 return row 

959 

960 def search_for_author(self,name,exact=False,id_no=True,verbose=False): 

961 ''' 

962 searches database for author matching criteria given in 'name' 

963 - if name is a string: 

964 - will match author name containing text 

965 - will match id number if integer given in string 

966 - will only look for exact match if exact flag is given 

967 - if name is an integer, will only match id number from database 

968 id_no: if True, will only return the id number of match(es) 

969 if False, returns name and id number 

970 e.g. as INTEGER 

971 >>> cif.search_for_author(6,id_no=False) 

972 ([u'Chao G Y'], [6]) 

973 as STRING 

974 >>> cif.search_for_author('6',id_no=False) 

975 ([u'Chao G Y', u'Geology Team 654'], [6, 7770]) 

976 ''' 

977 

978 authname = [] 

979 authid = [] 

980 

981 id, name = filter_int_and_str(name,exact=exact) 

982 authrow = self.query(self.authtbl)\ 

983 .filter(or_(self.authtbl.c.author_name.like(name), 

984 self.authtbl.c.author_id == id)) 

985 if len(authrow.all()) == 0: 

986 if verbose: print('%s not found in author database.' % name) 

987 else: 

988 for row in authrow.all(): 

989 authname += [row.author_name] 

990 authid += [row.author_id] 

991 

992 if id_no: return authid 

993 else: return authname,authid 

994 

995 def search_for_mineral(self, name=None, minid=None, exact=False): 

996 ''' 

997 searches database for mineral by name or by ID 

998 

999 Arguments: 

1000 ---------- 

1001 name (str or None): mineral name to match 

1002 minid (int or None): mineral ID in database to match 

1003 exact (bool): whether to match name exactly [False] 

1004 

1005 

1006 Returns: 

1007 -------- 

1008 list of matching rows 

1009 

1010 # [row.mineral_name, row.mineral_id] 

1011 ''' 

1012 

1013 rows = [] 

1014 q = self.query(self.nametbl) 

1015 

1016 if name is not None: 

1017 if not exact: 

1018 name = '%%%s%%' % name 

1019 rows = q.filter(self.nametbl.c.mineral_name.like(name)).all() 

1020 elif minid is not None: 

1021 rows = q.filter(self.nametbl.c.mineral_id == minid).all() 

1022 return rows 

1023 

1024 def cif_count(self): 

1025 return self.query(self.ciftbl).count() 

1026 

1027 def return_q(self): 

1028 q = [float(row.q) for row in self.query(self.qtbl).all()] 

1029 return np.array(q) 

1030 

1031 def get_mineral_names(self): 

1032 names = [] 

1033 for name in self.query(self.nametbl.c.mineral_name).all(): 

1034 if isinstance(name[0], str): 

1035 names.append(name[0]) 

1036 return sorted(names) 

1037 

1038 def return_author_names(self): 

1039 

1040 authorqry = self.query(self.authtbl) 

1041 names = [] 

1042 for row in authorqry.all(): 

1043 names += [row.author_name] 

1044 

1045 return sorted(names) 

1046 

1047def filter_int_and_str(s, exact=False): 

1048 try: 

1049 i = int(s) 

1050 except: 

1051 i = 0 

1052 if not exact: 

1053 try: 

1054 s = '%'+s+'%' 

1055 except: 

1056 pass 

1057 return i, s 

1058 

1059 

1060def column(matrix, i): 

1061 return [row[i] for row in matrix] 

1062 

1063class RangeParameter(object): 

1064 def __init__(self,min=None,max=None,unit=None): 

1065 self.min = min 

1066 self.max = max 

1067 self.unit = unit 

1068 

1069class SearchCIFdb(object): 

1070 ''' 

1071 interface to the search the cif database 

1072 ''' 

1073 def __init__(self, verbose=False): 

1074 

1075 self.verbose = verbose 

1076 

1077 ## running list of included amcsd id numbers 

1078 self.amcsd_id = [] 

1079 

1080 ## tags for searching 

1081 self.authors = [] 

1082 self.keywords = [] 

1083 self.categories = [] 

1084 self.amcsd = [] 

1085 self.qpks = [] 

1086 

1087 self.mnrlname = '' 

1088 

1089 self.elem_incl = [] 

1090 self.elem_excl = [] 

1091 self.allelem = column(ELEMENTS, 2) 

1092 

1093 self.lattice_keys = ['a', 'b', 'c', 'alpha', 'beta', 'gamma'] 

1094 

1095 self.sg = None 

1096 self.a = RangeParameter() 

1097 self.b = RangeParameter() 

1098 self.c = RangeParameter() 

1099 self.alpha = RangeParameter() 

1100 self.beta = RangeParameter() 

1101 self.gamma = RangeParameter() 

1102 

1103 

1104 def show_all(self): 

1105 for key in ['authors','mnrlname','keywords','categories','amcsd','qpks']: 

1106 print('%s : %s' % (key,self.show_parameter(key=key))) 

1107 print('chemistry : %s' % self.show_chemistry()) 

1108 print('geometry : %s' % self.show_geometry()) 

1109 

1110 def show_parameter(self, key='authors'): 

1111 s = '' 

1112 if len(self.__dict__[key]) > 0: 

1113 for i,item in enumerate(self.__dict__[key]): 

1114 item = item.split()[0] 

1115 if i == 0: 

1116 s = '%s' % (item) 

1117 else: 

1118 s = '%s, %s' % (s,item) 

1119 return s 

1120 

1121 

1122 def read_parameter(self,s,clear=True,key='authors'): 

1123 ''' 

1124 This function works for keys: 

1125 'authors' 

1126 'mnrlname 

1127 keywords','categories','amcsd','qpks' 

1128 ''' 

1129 

1130 if clear: 

1131 self.__dict__[key] = [] 

1132 if len(s) > 0: 

1133 for a in s.split(','): 

1134 try: 

1135 self.__dict__[key] += [a.split()[0]] 

1136 except: 

1137 pass 

1138 

1139 def read_chemistry(self,s,clear=True): 

1140 

1141 if clear: 

1142 self.elem_incl,self.elem_excl = [],[] 

1143 chem_incl,chem_excl = [],[] 

1144 

1145 chemstr = re.sub('[( )]','',s) 

1146 ii = -1 

1147 for i,s in enumerate(chemstr): 

1148 if s == '-': 

1149 ii = i 

1150 if ii > 0: 

1151 chem_incl = chemstr[0:ii].split(',') 

1152 if len(chemstr)-ii == 1: 

1153 for elem in self.allelem: 

1154 if elem not in chem_incl: 

1155 chem_excl += [elem] 

1156 elif ii < len(chemstr)-1: 

1157 chem_excl = chemstr[ii+1:].split(',') 

1158 else: 

1159 chem_incl = chemstr.split(',') 

1160 

1161 for elem in chem_incl: 

1162 elem = capitalize_string(elem) 

1163 if elem in self.allelem and elem not in self.elem_incl: 

1164 self.elem_incl += [elem] 

1165 if elem in self.elem_excl: 

1166 j = self.elem_excl.index(elem) 

1167 self.elem_excl.pop(j) 

1168 for elem in chem_excl: 

1169 elem = capitalize_string(elem) 

1170 if elem in self.allelem and elem not in self.elem_excl and elem not in self.elem_incl: 

1171 self.elem_excl += [elem] 

1172 

1173 def show_chemistry(self): 

1174 

1175 s = '' 

1176 for i,elem in enumerate(self.elem_incl): 

1177 if i==0: 

1178 s = '(%s' % elem 

1179 else: 

1180 s = '%s,%s' % (s,elem) 

1181 if len(self.elem_incl) > 0: 

1182 s = '%s) ' % s 

1183 if len(self.elem_excl) > 0: 

1184 s = '%s- ' % s 

1185 # if all else excluded, don't list 

1186 if (len(self.allelem)-20) > (len(self.elem_incl)+len(self.elem_excl)): 

1187 for i,elem in enumerate(self.elem_excl): 

1188 if i==0: 

1189 s = '%s(%s' % (s,elem) 

1190 else: 

1191 s = '%s,%s' % (s,elem) 

1192 if len(self.elem_excl) > 0: 

1193 s = '%s)' % s 

1194 return s 

1195 

1196 def show_geometry(self,unit='A'): 

1197 

1198 s = '' 

1199 

1200 key = 'sg' 

1201 if self.__dict__[key] is not None: 

1202 s = '%s%s=%s,' % (s,key,self.__dict__[key]) 

1203 for i,key in enumerate(self.lattice_keys): 

1204 if self.__dict__[key].min is not None: 

1205 s = '%s%s=%0.2f' % (s,key,float(self.__dict__[key].min)) 

1206 if self.__dict__[key].max is not None: 

1207 s = '%sto%0.2f' % (s,float(self.__dict__[key].max)) 

1208 s = '%s%s,' % (s,self.__dict__[key].unit) 

1209 

1210 if len(s) > 1: 

1211 if s[-1] == ',': 

1212 s = s[:-1] 

1213 

1214 return s 

1215 

1216 def read_geometry(self,s): 

1217 

1218 geostr = s.split(',') 

1219 used = [] 

1220 for par in geostr: 

1221 key = par.split('=')[0] 

1222 val = par.split('=')[1] 

1223 if key in 'sg': 

1224 self.__dict__[key] = val 

1225 used += [key] 

1226 elif key in self.lattice_keys: 

1227 values = [''.join(g) for _, g in groupby(val, str.isalpha)] 

1228 self.__dict__[key].min = values[0] 

1229 if len(values) > 1: 

1230 self.__dict__[key].unit = values[-1] 

1231 if len(values) > 2: 

1232 self.__dict__[key].max = values[2] 

1233 else: 

1234 self.__dict__[key].max = None 

1235 used += [key] 

1236 

1237 ## Resets undefined to None 

1238 for key in self.lattice_keys: 

1239 if key not in used: 

1240 self.__dict__[key] = RangeParameter() 

1241 key = 'sg' 

1242 if key not in used: 

1243 self.__dict__[key] = None 

1244 

1245def match_database(cifdb, peaks, minq=QMIN, maxq=QMAX, verbose=True): 

1246 """ 

1247 fracq : min. ratio of matched q to possible in q range, i.e. 'goodness gauge' 

1248 pk_wid : maximum range in q which qualifies as a match between fitted and ideal 

1249 """ 

1250 stepq = 0.05 

1251 scores,amcsd,total_peaks,match_peaks,miss_peaks = zip(*cifdb.amcsd_by_q(peaks, 

1252 qmin=minq,qmax=maxq,qstep=stepq, 

1253 list=None,verbose=False)) 

1254 

1255 MATCHES = [match for i,match in enumerate(amcsd) if scores[i] > 0] 

1256 

1257 if verbose: 

1258 print('\n') 

1259 if len(MATCHES) > 100: 

1260 print('DISPLAYING TOP 100 of %i TOTAL MATCHES FOUND.' % len(MATCHES)) 

1261 else: 

1262 print('%i TOTAL MATCHES FOUND.' % len(MATCHES)) 

1263 j = 0 

1264 for i,id_no in enumerate(amcsd): 

1265 if j < 100: 

1266 if scores[i] > 0: 

1267 j += 1 

1268 str = 'AMCSD %5d, %s (score of %2d --> %i of %i peaks)' % (id_no, 

1269 cifdb.mineral_by_amcsd(id_no),scores[i], 

1270 match_peaks[i],total_peaks[i]) 

1271 print(str) 

1272 print('') 

1273 

1274 return MATCHES 

1275 

1276 

1277def cif_match(peaks, qmin=None, qmax=None, verbose=False, _larch=None): 

1278 """ 

1279 fracq : min. ratio of matched q to possible in q range, i.e. 'goodness gauge' 

1280 pk_wid : maximum range in q which qualifies as a match between fitted and ideal 

1281 """ 

1282 cifdb = get_cifdb(_larch=_larch) 

1283 qstep = 0.05 

1284 

1285 rows = cifdb.amcsd_by_q(peaks, qmin=qmin,qmax=qmax, qstep=qstep) 

1286 

1287 scores, amcsd, total_peaks, match_peaks, miss_peaks = rows 

1288 

1289 matches = [] 

1290 for i, cdat in enumerate(amcsd): 

1291 if score[i] > 0: 

1292 matches.append(cdat) 

1293 

1294 if verbose: 

1295 print('\n') 

1296 if len(MATCHES) > 100: 

1297 print('DISPLAYING TOP 100 of %i TOTAL MATCHES FOUND.' % len(MATCHES)) 

1298 else: 

1299 print('%i TOTAL MATCHES FOUND.' % len(MATCHES)) 

1300 matches = matches[:100] 

1301 for i, id_no in enumerate(amcsd): 

1302 if j < 100: 

1303 if scores[i] > 0: 

1304 j += 1 

1305 str = 'AMCSD %5d, %s (score of %2d --> %i of %i peaks)' % (id_no, 

1306 cifdb.mineral_by_amcsd(id_no),scores[i], 

1307 match_peaks[i],total_peaks[i]) 

1308 print(str) 

1309 print('') 

1310 

1311 return matches 

1312 

1313 

1314def read_cif(filename=None, amcsd_id=None, _larch=None): 

1315 """make a representation of a CIF data structure 

1316 for crystallographic computations 

1317 

1318 Arguments: 

1319 ---------- 

1320 filename (str or None) name of CIF file 

1321 amcsd_id (int or None) index of CIF in Am Min Cystal Structure database 

1322 

1323 Returns 

1324 ------- 

1325 CIF representation 

1326 """ 

1327 cifdb = get_cifdb(_larch=_larch) 

1328 return create_xrdcif(filename=filename, cifdb=cifdb, amcsd_id=amcsd_id)