Coverage for /Users/Newville/Codes/xraylarch/larch/math/lincombo_fitting.py: 11%
158 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-09 10:08 -0600
« prev ^ index » next coverage.py v7.3.2, created at 2023-11-09 10:08 -0600
1#!/usr/bin/env python
2"""
3linear combination fitting
4"""
5import os
6import sys
7import time
8import json
9import copy
11from itertools import combinations
12from glob import glob
14import numpy as np
15from numpy.random import randint
16import lmfit
17from .. import Group
18from .utils import interp, index_of, etok
21def get_arrays(group, arrayname, xname='energy'):
22 y = None
23 if arrayname == 'chik':
24 x = getattr(group, 'k', None)
25 y = getattr(group, 'chi', None)
26 else:
27 x = getattr(group, xname, None)
28 y = getattr(group, arrayname, None)
29 return x, y
31def get_label(group):
32 label = None
33 for attr in ('filename', 'label', 'groupname', '__name__'):
34 _lab = getattr(group, attr, None)
35 if _lab is not None:
36 label = _lab
37 break
38 if label is None:
39 label = hex(id(group))
40 return label
43def groups2matrix(groups, yname='norm', xname='energy', xmin=-np.inf, xmax=np.inf,
44 interp_kind='cubic'):
45 """extract an array from a list of groups and construct a uniform 2d matrix
46 ready for linear analysis
48 Argumments
49 ----------
50 groups list of groups, assumed to have similar naming conventions
51 yname name of y-arrays to convert to matrix ['norm']
52 xname name of x-array to use ['energy']
53 xmin min x value [-inf]
54 xmax max x value [+inf]
55 interp_kind kind argument for interpolation ['cubic']
57 Returns
58 -------
59 xdat, ydat where xdat has shape (nx,) and ydat has shape (nx, ngroups)
60 """
61 # get arrays from first group
62 kweight = 0
63 if yname.startswith('chi'):
64 xname = 'k'
65 if len(yname) > 3:
66 kweight = int(yname[3:])
67 yname = 'chi'
68 e0 = getattr(groups[0], 'e0', -1.)
69 if e0 < 0:
70 raise ValueError("cannot get chi data")
72 xdat, ydat = get_arrays(groups[0], yname, xname=xname)
73 if xdat is None or ydat is None:
74 raise ValueError("cannot get arrays for arrayname='%s'" % yname)
76 imin, imax = None, None
77 if xmin is not None:
78 if xname == 'k':
79 if xmin > e0:
80 xmin = etok(xmin-e0)
81 else:
82 xmin = 0.0
84 imin = index_of(xdat, xmin)
85 if xmax is not None:
86 if xname == 'k':
87 if xmax > e0:
88 xmax = etok(xmax-e0)
89 else:
90 xmax = max(xdat)
91 imax = index_of(xdat, xmax) + 1
93 xsel = slice(imin, imax)
94 xdat = xdat[xsel]
95 ydat = ydat[xsel]
96 if xname == 'k' and kweight > 0:
97 ydat = ydat * xdat**kweight
98 ydat = [ydat]
99 for g in groups[1:]:
100 x, y = get_arrays(g, yname, xname=xname)
101 if xname == 'k' and kweight > 0:
102 y = y * x**kweight
103 ydat.append(interp(x, y, xdat, kind=interp_kind))
104 return xdat, np.array(ydat)
107def lincombo_fit(group, components, weights=None, minvals=None,
108 maxvals=None, arrayname='norm', xmin=-np.inf, xmax=np.inf,
109 sum_to_one=True, vary_e0=False, max_ncomps=None):
111 """perform linear combination fitting for a group
113 Arguments
114 ---------
115 group Group to be fitted
116 components List of groups to use as components (see Note 1)
117 weights array of starting weights (see Note 2)
118 minvals array of min weights (or None to mean -inf)
119 maxvals array of max weights (or None to mean +inf)
120 arrayname string of array name to be fit (see Note 3) ['norm']
121 xmin x-value for start of fit range [-inf]
122 xmax x-value for end of fit range [+inf]
123 sum_to_one bool, whether to force weights to sum to 1.0 [True]
124 vary_e0 bool, whether to vary e0 for data in fit [False]
126 Returns
127 -------
128 group with resulting weights and fit statistics
130 Notes
131 -----
132 1. The names of Group members for the components must match those of the
133 group to be fitted.
134 2. use `None` to use basic linear algebra solution.
135 3. arrayname is expected to be one of `norm`, `mu`, `dmude`, or `chi`.
136 It can be some other name but such named arrays should exist for all
137 components and groups.
138 """
140 # first, gather components
141 ncomps = len(components)
142 allgroups = [group]
143 allgroups.extend(components)
144 xdat, yall = groups2matrix(allgroups, yname=arrayname,
145 xname='energy', xmin=xmin, xmax=xmax)
147 ydat = yall[0, :]
148 ycomps = yall[1:, :].transpose()
150 # second use unconstrained linear algebra to estimate weights
151 ls_out = np.linalg.lstsq(ycomps, ydat, rcond=-1)
152 ls_vals = ls_out[0]
153 # third use lmfit, imposing bounds and sum_to_one constraint
154 if weights in (None, [None]*ncomps):
155 weights = ls_vals
156 if minvals in (None, [None]*ncomps):
157 minvals = -np.inf * np.ones(ncomps)
158 if maxvals in (None, [None]*ncomps):
159 maxvals = np.inf * np.ones(ncomps)
161 def lincombo_resid(params, xdata, ydata, ycomps):
162 npts, ncomps = ycomps.shape
163 if params['e0_shift'].vary:
164 y = interp(xdata, ydata, xdata+params['e0_shift'].value, kind='cubic')
165 else:
166 y = ydata*1.0
167 resid = -y
168 for i in range(ncomps):
169 resid += ycomps[:, i] * params['c%i' % i].value
170 return resid
172 params = lmfit.Parameters()
173 e0_val = 0.01 if vary_e0 else 0.
174 params.add('e0_shift', value=e0_val, vary=vary_e0)
175 for i in range(ncomps):
176 params.add('c%i' % i, value=weights[i], min=minvals[i], max=maxvals[i])
178 if sum_to_one:
179 expr = ['1'] + ['c%i' % i for i in range(ncomps-1)]
180 params['c%i' % (ncomps-1)].expr = '-'.join(expr)
182 expr = ['c%i' % i for i in range(ncomps)]
183 params.add('total', expr='+'.join(expr))
185 result = lmfit.minimize(lincombo_resid, params, args=(xdat, ydat, ycomps))
187 # gather results
188 weights, weights_lstsq = {}, {}
189 params, fcomps = {}, {}
190 params['e0_shift'] = copy.deepcopy(result.params['e0_shift'])
191 for i in range(ncomps):
192 label = get_label(components[i])
193 weights[label] = result.params['c%i' % i].value
194 params[label] = copy.deepcopy(result.params['c%i' % i])
195 weights_lstsq[label] = ls_vals[i]
196 fcomps[label] = ycomps[:, i] * result.params['c%i' % i].value
199 if 'total' in result.params:
200 params['total'] = copy.deepcopy(result.params['total'])
202 npts, ncomps = ycomps.shape
203 yfit = np.zeros(npts)
204 for i in range(ncomps):
205 yfit += ycomps[:, i] * result.params['c%i' % i].value
206 if params['e0_shift'].vary:
207 yfit = interp(xdat+params['e0_shift'].value, yfit, xdat, kind='cubic')
208 rfactor = ((ydat-yfit)**2).sum() / (ydat**2).sum()
209 return Group(result=result, chisqr=result.chisqr, redchi=result.redchi,
210 params=params, weights=weights, weights_lstsq=weights_lstsq,
211 xdata=xdat, ydata=ydat, yfit=yfit, ycomps=fcomps,
212 arrayname=arrayname, rfactor=rfactor,
213 xmin=xmin, xmax=xmax)
215def lincombo_fitall(group, components, weights=None, minvals=None, maxvals=None,
216 arrayname='norm', xmin=-np.inf, xmax=np.inf,
217 max_ncomps=None, sum_to_one=True, vary_e0=False,
218 min_weight=0.0005, max_output=16):
219 """perform linear combination fittings for a group with all combinations
220 of 2 or more of the components given
222 Arguments
223 ---------
224 group Group to be fitted
225 components List of groups to use as components (see Note 1)
226 weights array of starting weights (or None to use basic linear alg solution)
227 minvals array of min weights (or None to mean -inf)
228 maxvals array of max weights (or None to mean +inf)
229 arrayname string of array name to be fit (see Note 2)
230 xmin x-value for start of fit range [-inf]
231 xmax x-value for end of fit range [+inf]
232 sum_to_one bool, whether to force weights to sum to 1.0 [True]
233 max_ncomps int or None: max number of components to use [None -> all]
234 vary_e0 bool, whether to vary e0 for data in fit [False]
235 min_weight float, minimum weight for each component to save result [0.0005]
236 max_output int, max number of outputs, sorted by reduced chi-square [16]
237 Returns
238 -------
239 list of groups with resulting weights and fit statistics, ordered by
240 reduced chi-square (best first)
242 Notes
243 -----
244 1. The names of Group members for the components must match those of the
245 group to be fitted.
246 2. arrayname can be one of `norm` or `dmude`
247 """
249 ncomps = len(components)
251 # here we save the inputs weights and bounds for each component by name
252 # so they can be imposed for the individual fits
253 _save = {}
255 if weights in (None, [None]*ncomps):
256 weights = [None]*ncomps
257 if minvals in (None, [None]*ncomps):
258 minvals = -np.inf * np.ones(ncomps)
259 if maxvals in (None, [None]*ncomps):
260 maxvals = np.inf * np.ones(ncomps)
262 for i in range(ncomps):
263 _save[get_label(components[i])] = (weights[i], minvals[i], maxvals[i])
265 if max_ncomps is None:
266 max_ncomps = ncomps
267 elif max_ncomps > 0:
268 max_ncomps = int(min(max_ncomps, ncomps))
269 out = []
270 nrejected = 0
271 comps_kept = []
272 for nx in range(2, int(max_ncomps)+1):
273 for comps in combinations(components, nx):
274 labs = [get_label(c) for c in comps]
275 _wts = [1.0/nx for lab in labs]
276 _min = [_save[lab][1] for lab in labs]
277 _max = [_save[lab][2] for lab in labs]
279 ret = lincombo_fit(group, comps, weights=_wts,
280 arrayname=arrayname, minvals=_min,
281 maxvals=_max, xmin=xmin, xmax=xmax,
282 sum_to_one=sum_to_one, vary_e0=vary_e0)
284 _sig_comps = []
285 for key, wt in ret.weights.items():
286 if wt > min_weight:
287 _sig_comps.append(key)
288 _sig_comps.sort()
289 if _sig_comps not in comps_kept:
290 comps_kept.append(_sig_comps)
291 out.append(ret)
292 else:
293 nrejected += 1
295 # sort outputs by reduced chi-square
296 # print("lin combo : ", len(out), nrejected, max_output)
297 return sorted(out, key=lambda x: x.redchi)[:max_output]