Coverage for /Users/Newville/Codes/xraylarch/larch/xrd/amcsd_utils.py: 29%

84 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-09 10:08 -0600

1import os 

2import sqlite3 

3from base64 import b64encode, b64decode 

4 

5import numpy as np 

6 

7from sqlalchemy import MetaData, create_engine, func, text, and_ 

8from sqlalchemy.sql import select 

9from sqlalchemy.orm import sessionmaker 

10from sqlalchemy.pool import SingletonThreadPool 

11 

12try: 

13 from pymatgen.io.cif import CifParser 

14 from pymatgen.symmetry.analyzer import SpacegroupAnalyzer 

15 from pymatgen.core import Molecule, IMolecule, IStructure 

16except: 

17 CifParser = SpacegroupAnalyzer = None 

18 Molecule = IMolecule = IStructure = None 

19 

20from larch.utils.physical_constants import ATOM_SYMS, ATOM_NAMES 

21 

22__version__ = '1' 

23 

24PMG_CIF_OPTS = dict(occupancy_tolerance=10, site_tolerance=5e-3) 

25 

26 

27def make_engine(dbname): 

28 "create engine for sqlite connection" 

29 return create_engine('sqlite:///%s' % (dbname), 

30 poolclass=SingletonThreadPool, 

31 connect_args={'check_same_thread': False}) 

32 

33def isAMCSD(dbname): 

34 """whether a file is a valid AMCSD database 

35 

36 Args: 

37 dbname (string): name of AMCSD database file 

38 

39 Returns: 

40 bool: is file a valid AMCSD database 

41 

42 Notes: 

43 1. must be a sqlite db file, with tables 

44 'cif', 'elements', 'spacegroup' 

45 """ 

46 _tables = ('cif', 'elements', 'spacegroups') 

47 result = False 

48 try: 

49 engine = make_engine(dbname) 

50 meta = MetaData() 

51 meta.reflect(bind=engine) 

52 result = all([t in meta.tables for t in _tables]) 

53 except: 

54 pass 

55 return result 

56 

57 

58farray_scale = 4.e6 

59 

60def encode_farray(dat): 

61 """encodes a list of fractional coordinate as strings (stricly on (-1,1)) 

62 to an string for saving to db, to be decoded by decode_farray() 

63 preserves precision to slightly better than 6 digits 

64 """ 

65 work = [] 

66 for d in dat: 

67 if d == '?': 

68 work.append(2) # out-of-bounds as '?' 

69 elif d == '.': 

70 work.append(3) # out-of-bounds as '.' 

71 else: 

72 if '(' in d or '(' in d: 

73 d = d.replace(')', ' : ').replace('(', ' : ') 

74 d = d.split(':')[0].strip() 

75 try: 

76 fval = float(d) 

77 except ValueError: 

78 d = '0' 

79 work.append(d) 

80 x = (farray_scale*np.array([float(x) for x in work])).round() 

81 return b64encode(x.astype(np.int32).tobytes()).decode('ascii') 

82 

83def decode_farray(dat): 

84 """decodes a string encoded by encode_farray() 

85 returns list of string 

86 """ 

87 arr = np.fromstring(b64decode(dat), dtype=np.int32)/farray_scale 

88 out = [] 

89 for a in arr: 

90 if (abs(a-2.0) < 1.e-5): 

91 out.append('?') 

92 elif (abs(a-3.0) < 1.e-5): 

93 out.append('.') 

94 else: 

95 out.append(f"{a:f}") 

96 return out 

97 

98def put_optarray(dat, attr): 

99 d = dat.get(attr, '0') 

100 if d != '0': 

101 d = encode_farray(d) 

102 return d 

103 

104def get_optarray(dat): 

105 if dat not in (0, '0'): 

106 dat = decode_farray(dat) 

107 return dat 

108 

109 

110schema = ( 

111 '''CREATE TABLE version (id integer primary key, tag text, date text, notes text);''', 

112 '''CREATE TABLE elements ( 

113 id integer not null, 

114 z INTEGER NOT NULL, 

115 name VARCHAR(40), 

116 symbol VARCHAR(2) NOT NULL primary key);''', 

117 

118 '''CREATE TABLE spacegroups ( 

119 id INTEGER primary key, 

120 hm_notation VARCHAR(16) not null unique, 

121 symmetry_xyz text NOT NULL, 

122 category text );''', 

123 

124 '''CREATE TABLE minerals ( 

125 id INTEGER not null primary key, 

126 name text not null unique);''', 

127 

128 '''CREATE TABLE authors ( 

129 id INTEGER NOT NULL primary key, 

130 name text unique);''', 

131 '''CREATE TABLE publications ( 

132 id INTEGER NOT NULL primary key, 

133 journalname text not null, 

134 volume text, 

135 year integer not null, 

136 page_first text, 

137 page_last text);''', 

138 

139 '''CREATE TABLE publication_authors ( 

140 publication_id INTEGER not null, 

141 author_id integer not null, 

142 FOREIGN KEY(publication_id) REFERENCES publications (id), 

143 FOREIGN KEY(author_id) REFERENCES authors (id));''', 

144 

145 '''CREATE TABLE cif ( 

146 id integer not null primary key, 

147 mineral_id INTEGER, 

148 spacegroup_id INTEGER, 

149 publication_id INTEGER, 

150 formula text, 

151 compound text, 

152 pub_title text, 

153 formula_title text, 

154 a text, 

155 b text, 

156 c text, 

157 alpha text, 

158 beta text, 

159 gamma text, 

160 cell_volume text, 

161 crystal_density text, 

162 atoms_sites text, 

163 atoms_x text, 

164 atoms_y text, 

165 atoms_z text, 

166 atoms_occupancy text, 

167 atoms_u_iso text, 

168 atoms_aniso_label text, 

169 atoms_aniso_u11 text, 

170 atoms_aniso_u22 text, 

171 atoms_aniso_u33 text, 

172 atoms_aniso_u12 text, 

173 atoms_aniso_u13 text, 

174 atoms_aniso_u23 text, 

175 qdat text, 

176 amcsd_url text, 

177 FOREIGN KEY(spacegroup_id) REFERENCES spacegroups (id), 

178 FOREIGN KEY(mineral_id) REFERENCES minerals (id), 

179 FOREIGN KEY(publication_id) REFERENCES publications (id));''', 

180 

181 '''CREATE TABLE cif_elements ( 

182 cif_id text not null, 

183 element VARCHAR(2) not null);''', 

184 ) 

185 

186 

187def create_amcsd(dbname='test.db'): 

188 if os.path.exists(dbname): 

189 os.unlink(dbname) 

190 

191 conn = sqlite3.connect(dbname) 

192 cursor = conn.cursor() 

193 for s in schema: 

194 cursor.execute(s) 

195 

196 cursor.execute('insert into version values (?,?,?,?)', 

197 ('0', 'in progress', 'today', 'in progress')) 

198 

199 atz, i = 0, 0 

200 for sym, name in zip(ATOM_SYMS, ATOM_NAMES): 

201 i += 1 

202 atz += 1 

203 if sym == 'D': 

204 atz = 1 

205 cursor.execute('insert into elements values (?,?,?,?)', (i, atz, sym, name))