Coverage for /Users/Newville/Codes/xraylarch/larch/math/lincombo_fitting.py: 11%

158 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-11-09 10:08 -0600

1#!/usr/bin/env python 

2""" 

3linear combination fitting 

4""" 

5import os 

6import sys 

7import time 

8import json 

9import copy 

10 

11from itertools import combinations 

12from glob import glob 

13 

14import numpy as np 

15from numpy.random import randint 

16import lmfit 

17from .. import Group 

18from .utils import interp, index_of, etok 

19 

20 

21def get_arrays(group, arrayname, xname='energy'): 

22 y = None 

23 if arrayname == 'chik': 

24 x = getattr(group, 'k', None) 

25 y = getattr(group, 'chi', None) 

26 else: 

27 x = getattr(group, xname, None) 

28 y = getattr(group, arrayname, None) 

29 return x, y 

30 

31def get_label(group): 

32 label = None 

33 for attr in ('filename', 'label', 'groupname', '__name__'): 

34 _lab = getattr(group, attr, None) 

35 if _lab is not None: 

36 label = _lab 

37 break 

38 if label is None: 

39 label = hex(id(group)) 

40 return label 

41 

42 

43def groups2matrix(groups, yname='norm', xname='energy', xmin=-np.inf, xmax=np.inf, 

44 interp_kind='cubic'): 

45 """extract an array from a list of groups and construct a uniform 2d matrix 

46 ready for linear analysis 

47 

48 Argumments 

49 ---------- 

50 groups list of groups, assumed to have similar naming conventions 

51 yname name of y-arrays to convert to matrix ['norm'] 

52 xname name of x-array to use ['energy'] 

53 xmin min x value [-inf] 

54 xmax max x value [+inf] 

55 interp_kind kind argument for interpolation ['cubic'] 

56 

57 Returns 

58 ------- 

59 xdat, ydat where xdat has shape (nx,) and ydat has shape (nx, ngroups) 

60 """ 

61 # get arrays from first group 

62 kweight = 0 

63 if yname.startswith('chi'): 

64 xname = 'k' 

65 if len(yname) > 3: 

66 kweight = int(yname[3:]) 

67 yname = 'chi' 

68 e0 = getattr(groups[0], 'e0', -1.) 

69 if e0 < 0: 

70 raise ValueError("cannot get chi data") 

71 

72 xdat, ydat = get_arrays(groups[0], yname, xname=xname) 

73 if xdat is None or ydat is None: 

74 raise ValueError("cannot get arrays for arrayname='%s'" % yname) 

75 

76 imin, imax = None, None 

77 if xmin is not None: 

78 if xname == 'k': 

79 if xmin > e0: 

80 xmin = etok(xmin-e0) 

81 else: 

82 xmin = 0.0 

83 

84 imin = index_of(xdat, xmin) 

85 if xmax is not None: 

86 if xname == 'k': 

87 if xmax > e0: 

88 xmax = etok(xmax-e0) 

89 else: 

90 xmax = max(xdat) 

91 imax = index_of(xdat, xmax) + 1 

92 

93 xsel = slice(imin, imax) 

94 xdat = xdat[xsel] 

95 ydat = ydat[xsel] 

96 if xname == 'k' and kweight > 0: 

97 ydat = ydat * xdat**kweight 

98 ydat = [ydat] 

99 for g in groups[1:]: 

100 x, y = get_arrays(g, yname, xname=xname) 

101 if xname == 'k' and kweight > 0: 

102 y = y * x**kweight 

103 ydat.append(interp(x, y, xdat, kind=interp_kind)) 

104 return xdat, np.array(ydat) 

105 

106 

107def lincombo_fit(group, components, weights=None, minvals=None, 

108 maxvals=None, arrayname='norm', xmin=-np.inf, xmax=np.inf, 

109 sum_to_one=True, vary_e0=False, max_ncomps=None): 

110 

111 """perform linear combination fitting for a group 

112 

113 Arguments 

114 --------- 

115 group Group to be fitted 

116 components List of groups to use as components (see Note 1) 

117 weights array of starting weights (see Note 2) 

118 minvals array of min weights (or None to mean -inf) 

119 maxvals array of max weights (or None to mean +inf) 

120 arrayname string of array name to be fit (see Note 3) ['norm'] 

121 xmin x-value for start of fit range [-inf] 

122 xmax x-value for end of fit range [+inf] 

123 sum_to_one bool, whether to force weights to sum to 1.0 [True] 

124 vary_e0 bool, whether to vary e0 for data in fit [False] 

125 

126 Returns 

127 ------- 

128 group with resulting weights and fit statistics 

129 

130 Notes 

131 ----- 

132 1. The names of Group members for the components must match those of the 

133 group to be fitted. 

134 2. use `None` to use basic linear algebra solution. 

135 3. arrayname is expected to be one of `norm`, `mu`, `dmude`, or `chi`. 

136 It can be some other name but such named arrays should exist for all 

137 components and groups. 

138 """ 

139 

140 # first, gather components 

141 ncomps = len(components) 

142 allgroups = [group] 

143 allgroups.extend(components) 

144 xdat, yall = groups2matrix(allgroups, yname=arrayname, 

145 xname='energy', xmin=xmin, xmax=xmax) 

146 

147 ydat = yall[0, :] 

148 ycomps = yall[1:, :].transpose() 

149 

150 # second use unconstrained linear algebra to estimate weights 

151 ls_out = np.linalg.lstsq(ycomps, ydat, rcond=-1) 

152 ls_vals = ls_out[0] 

153 # third use lmfit, imposing bounds and sum_to_one constraint 

154 if weights in (None, [None]*ncomps): 

155 weights = ls_vals 

156 if minvals in (None, [None]*ncomps): 

157 minvals = -np.inf * np.ones(ncomps) 

158 if maxvals in (None, [None]*ncomps): 

159 maxvals = np.inf * np.ones(ncomps) 

160 

161 def lincombo_resid(params, xdata, ydata, ycomps): 

162 npts, ncomps = ycomps.shape 

163 if params['e0_shift'].vary: 

164 y = interp(xdata, ydata, xdata+params['e0_shift'].value, kind='cubic') 

165 else: 

166 y = ydata*1.0 

167 resid = -y 

168 for i in range(ncomps): 

169 resid += ycomps[:, i] * params['c%i' % i].value 

170 return resid 

171 

172 params = lmfit.Parameters() 

173 e0_val = 0.01 if vary_e0 else 0. 

174 params.add('e0_shift', value=e0_val, vary=vary_e0) 

175 for i in range(ncomps): 

176 params.add('c%i' % i, value=weights[i], min=minvals[i], max=maxvals[i]) 

177 

178 if sum_to_one: 

179 expr = ['1'] + ['c%i' % i for i in range(ncomps-1)] 

180 params['c%i' % (ncomps-1)].expr = '-'.join(expr) 

181 

182 expr = ['c%i' % i for i in range(ncomps)] 

183 params.add('total', expr='+'.join(expr)) 

184 

185 result = lmfit.minimize(lincombo_resid, params, args=(xdat, ydat, ycomps)) 

186 

187 # gather results 

188 weights, weights_lstsq = {}, {} 

189 params, fcomps = {}, {} 

190 params['e0_shift'] = copy.deepcopy(result.params['e0_shift']) 

191 for i in range(ncomps): 

192 label = get_label(components[i]) 

193 weights[label] = result.params['c%i' % i].value 

194 params[label] = copy.deepcopy(result.params['c%i' % i]) 

195 weights_lstsq[label] = ls_vals[i] 

196 fcomps[label] = ycomps[:, i] * result.params['c%i' % i].value 

197 

198 

199 if 'total' in result.params: 

200 params['total'] = copy.deepcopy(result.params['total']) 

201 

202 npts, ncomps = ycomps.shape 

203 yfit = np.zeros(npts) 

204 for i in range(ncomps): 

205 yfit += ycomps[:, i] * result.params['c%i' % i].value 

206 if params['e0_shift'].vary: 

207 yfit = interp(xdat+params['e0_shift'].value, yfit, xdat, kind='cubic') 

208 rfactor = ((ydat-yfit)**2).sum() / (ydat**2).sum() 

209 return Group(result=result, chisqr=result.chisqr, redchi=result.redchi, 

210 params=params, weights=weights, weights_lstsq=weights_lstsq, 

211 xdata=xdat, ydata=ydat, yfit=yfit, ycomps=fcomps, 

212 arrayname=arrayname, rfactor=rfactor, 

213 xmin=xmin, xmax=xmax) 

214 

215def lincombo_fitall(group, components, weights=None, minvals=None, maxvals=None, 

216 arrayname='norm', xmin=-np.inf, xmax=np.inf, 

217 max_ncomps=None, sum_to_one=True, vary_e0=False, 

218 min_weight=0.0005, max_output=16): 

219 """perform linear combination fittings for a group with all combinations 

220 of 2 or more of the components given 

221 

222 Arguments 

223 --------- 

224 group Group to be fitted 

225 components List of groups to use as components (see Note 1) 

226 weights array of starting weights (or None to use basic linear alg solution) 

227 minvals array of min weights (or None to mean -inf) 

228 maxvals array of max weights (or None to mean +inf) 

229 arrayname string of array name to be fit (see Note 2) 

230 xmin x-value for start of fit range [-inf] 

231 xmax x-value for end of fit range [+inf] 

232 sum_to_one bool, whether to force weights to sum to 1.0 [True] 

233 max_ncomps int or None: max number of components to use [None -> all] 

234 vary_e0 bool, whether to vary e0 for data in fit [False] 

235 min_weight float, minimum weight for each component to save result [0.0005] 

236 max_output int, max number of outputs, sorted by reduced chi-square [16] 

237 Returns 

238 ------- 

239 list of groups with resulting weights and fit statistics, ordered by 

240 reduced chi-square (best first) 

241 

242 Notes 

243 ----- 

244 1. The names of Group members for the components must match those of the 

245 group to be fitted. 

246 2. arrayname can be one of `norm` or `dmude` 

247 """ 

248 

249 ncomps = len(components) 

250 

251 # here we save the inputs weights and bounds for each component by name 

252 # so they can be imposed for the individual fits 

253 _save = {} 

254 

255 if weights in (None, [None]*ncomps): 

256 weights = [None]*ncomps 

257 if minvals in (None, [None]*ncomps): 

258 minvals = -np.inf * np.ones(ncomps) 

259 if maxvals in (None, [None]*ncomps): 

260 maxvals = np.inf * np.ones(ncomps) 

261 

262 for i in range(ncomps): 

263 _save[get_label(components[i])] = (weights[i], minvals[i], maxvals[i]) 

264 

265 if max_ncomps is None: 

266 max_ncomps = ncomps 

267 elif max_ncomps > 0: 

268 max_ncomps = int(min(max_ncomps, ncomps)) 

269 out = [] 

270 nrejected = 0 

271 comps_kept = [] 

272 for nx in range(2, int(max_ncomps)+1): 

273 for comps in combinations(components, nx): 

274 labs = [get_label(c) for c in comps] 

275 _wts = [1.0/nx for lab in labs] 

276 _min = [_save[lab][1] for lab in labs] 

277 _max = [_save[lab][2] for lab in labs] 

278 

279 ret = lincombo_fit(group, comps, weights=_wts, 

280 arrayname=arrayname, minvals=_min, 

281 maxvals=_max, xmin=xmin, xmax=xmax, 

282 sum_to_one=sum_to_one, vary_e0=vary_e0) 

283 

284 _sig_comps = [] 

285 for key, wt in ret.weights.items(): 

286 if wt > min_weight: 

287 _sig_comps.append(key) 

288 _sig_comps.sort() 

289 if _sig_comps not in comps_kept: 

290 comps_kept.append(_sig_comps) 

291 out.append(ret) 

292 else: 

293 nrejected += 1 

294 

295 # sort outputs by reduced chi-square 

296 # print("lin combo : ", len(out), nrejected, max_output) 

297 return sorted(out, key=lambda x: x.redchi)[:max_output]