1 """
2 Computational utility functions.
3
4 This module defines a number of low-level, numerical, high-performance utility
5 functions like L{rmsd} for example.
6 """
7
8 import numpy
9 import numpy.random
10
11 import csb.numeric
12
13
15 """
16 Return the translation vector and the rotation matrix
17 minimizing the RMSD between two sets of vectors, i.e.
18 if
19
20 >>> R,t = fit(X,Y)
21
22 then
23
24 >>> Y = dot(Y, transpose(R)) + t
25
26 will be the fitted configuration.
27
28 @param X: 3 x n input vector
29 @type X: numpy array
30
31 @param Y: 3 x n input vector
32 @type Y: numpy array
33
34 @return: 3 x 3 rotation matrix and 3 x 1 translation vector
35 @rtype: tuple
36 """
37
38 from numpy.linalg import svd, det
39 from numpy import dot
40
41
42
43 x = X.mean(0)
44 y = Y.mean(0)
45
46
47
48 V, _L, U = svd(dot((X - x).T, Y - y))
49
50
51
52 R = dot(V, U)
53
54 if det(R) < 0.:
55 U[-1] *= -1
56 R = dot(V, U)
57
58 t = x - dot(R, y)
59
60 return R, t
61
63 """
64 Return the translation vector and the rotation matrix
65 minimizing the weighted RMSD between two sets of vectors, i.e.
66 if
67
68 >>> R,t = wfit(X,Y,w)
69
70 then
71
72 >>> Y = dot(Y, transpose(R)) + t
73
74 will be the fitted configuration.
75
76 @param X: 3 x n input vector
77 @type X: numpy array
78
79 @param Y: 3 x n input vector
80 @type Y: numpy array
81
82 @param w: input weights
83 @type w: numpy array
84
85 @return: 3 x 3 rotation matrix and 3 x 1 translation vector
86 @rtype: tuple
87 """
88
89 from numpy.linalg import svd, det
90 from numpy import dot, transpose, average
91
92
93
94 norm = sum(w)
95 x = dot(w, X) / norm
96 y = dot(w, Y) / norm
97
98
99
100 V, _L, U = svd(dot(transpose(X - x) * w, Y - y))
101
102
103
104 R = dot(V, U)
105
106 if det(R) < 0.:
107 U[2] *= -1
108 R = dot(V, U)
109
110 t = x - dot(R, y)
111
112 return R, t
113
115 """
116 Generate a superposition of X,Y where::
117
118 R ~ exp(trace(dot(transpose(dot(transpose(X-t), Y)), R)))
119 t ~ N(t_opt, 1 / sqrt(N))
120 """
121
122 from csb.statistics.rand import random_rotation
123 from numpy import dot, transpose, average
124
125 if w is None:
126 R, t = fit(X, Y)
127 else:
128 R, t = wfit(X, Y, w)
129
130 N = len(X)
131
132 for i in range(niter):
133
134 if w is None:
135 A = dot(transpose(X - t), Y)
136 else:
137 A = dot(transpose(X - t) * w, Y)
138
139 R = random_rotation(A)
140
141
142 if w is None:
143 mu = average(X - dot(Y, transpose(R)), 0)
144 t = numpy.random.standard_normal(len(mu)) / numpy.sqrt(N) + mu
145 else:
146 mu = dot(w, X - dot(Y, transpose(R))) / numpy.sum(w)
147 t = numpy.random.standard_normal(len(mu)) / numpy.sqrt(numpy.sum(w)) + mu
148
149 return R, t
150
151 -def fit_wellordered(X, Y, n_iter=None, n_stdv=2, tol_rmsd=.5,
152 tol_stdv=0.05, full_output=False):
153 """
154 Matche two arrays onto each other by iteratively throwing out
155 highly deviating entries.
156
157 (Reference: Nilges et al.: Delineating well-ordered regions in
158 protein structure ensembles).
159
160 @param X: 3 x n input vector
161 @type X: numpy array
162
163 @param Y: 3 x n input vector
164 @type Y: numpy array
165
166 @param n_stdv: number of standard deviations above which points are considered to be outliers
167 @param tol_rmsd: tolerance in rmsd
168 @param tol_stdv: tolerance in standard deviations
169 @param full_output: also return full history of values calculated by the algorithm
170 """
171
172 from numpy import ones, compress, dot, transpose, sqrt, sum, nonzero, std, average
173
174
175 rmsd_list = []
176
177 rmsd_old = 0.
178 stdv_old = 0.
179
180 n = 0
181 converged = False
182
183 mask = ones(X.shape[0])
184
185 while not converged:
186
187
188
189 R, t = fit(compress(mask, X, 0), compress(mask, Y, 0))
190
191
192
193 d = sqrt(sum((X - dot(Y, transpose(R)) - t) ** 2, 1))
194
195
196
197 rmsd = sqrt(average(compress(mask, d) ** 2, 0))
198 stdv = std(compress(mask, d))
199
200
201
202 if stdv < 1e-10: break
203
204 d_rmsd = abs(rmsd - rmsd_old)
205 d_stdv = abs(1 - stdv_old / stdv)
206
207 if d_rmsd < tol_rmsd:
208 if d_stdv < tol_stdv:
209 converged = 1
210 else:
211 stdv_old = stdv
212 else:
213 rmsd_old = rmsd
214 stdv_old = stdv
215
216
217
218 perc = average(1. * mask)
219
220
221
222 new_mask = mask * (d < rmsd + n_stdv * stdv)
223 outliers = nonzero(mask - new_mask)
224 rmsd_list.append([perc, rmsd, outliers])
225
226 mask = new_mask
227
228 if n_iter and n >= n_iter: break
229
230 n += 1
231
232 if full_output:
233 return (R, t), rmsd_list
234 else:
235 return (R, t)
236
238 """
239 Calculate the root mean squared deviation (RMSD) using Kabsch' formula.
240
241 @param X: 3 x n input vector
242 @type X: numpy array
243 @param Y: 3 x n input vector
244 @type Y: numpy array
245
246 @return: rmsd value between the input vectors
247 @rtype: float
248 """
249
250 from numpy import sum, dot, transpose, sqrt, clip, average
251 from numpy.linalg import svd
252
253 X = X - average(X, 0)
254 Y = Y - average(Y, 0)
255
256 R_x = sum(X ** 2)
257 R_y = sum(Y ** 2)
258
259 L = svd(dot(transpose(Y), X))[1]
260
261 return sqrt(clip(R_x + R_y - 2 * sum(L), 0., 1e300) / len(X))
262
264 """
265 Calculate the RMSD of two conformations as they are (no fitting is done).
266 For details, see L{rmsd}.
267
268 @return: rmsd value between the input vectors
269 @rtype: float
270 """
271 return distance_sq(X, Y).mean() ** 0.5
272
274 """
275 Calculate the weighted root mean squared deviation (wRMSD) using Kabsch'
276 formula.
277
278 @param X: 3 x n input vector
279 @type X: numpy array
280 @param Y: 3 x n input vector
281 @type Y: numpy array
282 @param w: input weights
283 @type w: numpy array
284
285 @return: rmsd value between the input vectors
286 @rtype: float
287 """
288
289 from numpy import sum, dot, transpose, sqrt, clip, average
290 from numpy.linalg import svd
291
292
293
294 w = w / w.sum()
295
296 X = X - dot(w, X)
297 Y = Y - dot(w, Y)
298
299 R_x = sum(X.T ** 2 * w)
300 R_y = sum(Y.T ** 2 * w)
301
302 L = svd(dot(transpose(Y) * w, X))[1]
303
304 return sqrt(clip(R_x + R_y - 2 * sum(L), 0., 1e300))
305
307 """
308 Compute the circular RMSD of two phi/psi angle sets.
309
310 @param x: query phi/psi angles (Nx2 array, in radians)
311 @type x: array
312 @param y: subject phi/psi angles (Nx2 array, in radians)
313 @type y: array
314
315 @rtype: float
316 """
317 from numpy import array, sin, cos, sqrt
318
319 phi, psi = (x - y).T
320 assert len(phi) == len(psi)
321
322 r = sin(phi).sum() ** 2 + cos(phi).sum() ** 2 + sin(psi).sum() ** 2 + cos(psi).sum() ** 2
323 return 1 - (1.0 / len(phi)) * sqrt(r / 2.0)
324
326
327 from numpy import power
328
329 if Lmin > 15:
330 d0 = 1.24 * power(Lmin - 15.0, 1.0 / 3.0) - 1.8
331 else:
332 d0 = 0.5
333
334 return max(0.5, d0)
335
337 """
338 Evaluate the TM-score of two conformations as they are (no fitting is done).
339
340 @param x: 3 x n input array
341 @type x: numpy array
342 @param y: 3 x n input array
343 @type y: numpy array
344 @param L: length for normalization (default: C{len(x)})
345 @type L: int
346 @param d0: d0 in Angstroms (default: calculate from C{L})
347 @type d0: float
348
349 @return: computed TM-score
350 @rtype: float
351 """
352 from numpy import sum
353
354 if not L:
355 L = len(x)
356 if not d0:
357 d0 = _tm_d0(L)
358 d = distance(x, y)
359
360 return sum(1 / (1 + (d / d0) ** 2)) / L
361
363 """
364 Compute the TM-score of two protein coordinate vector sets.
365 Reference: http://zhanglab.ccmb.med.umich.edu/TM-score
366
367 @param x: 3 x n input vector
368 @type x: numpy.array
369 @param y: 3 x n input vector
370 @type y: numpy.array
371 @param fit_method: a reference to a proper fitting function, e.g. fit
372 or fit_wellordered
373 @type fit_method: function
374 @param L: length for normalization (default: C{len(x)})
375 @type L: int
376 @param d0: d0 in Angstroms (default: calculate from C{L})
377 @type d0: float
378 @param L_ini_min: minimum length of initial alignment window (increase
379 to speed up but loose precision, a value of 0 disables local alignment
380 initialization)
381 @type L_ini_min: int
382 @param iL_step: initial alignment window shift (increase to speed up
383 but loose precision)
384 @type iL_step: int
385
386 @return: rotation matrix, translation vector, TM-score
387 @rtype: tuple
388 """
389 from numpy import asarray, sum, dot, zeros, clip
390
391 x, y = asarray(x), asarray(y)
392 if not L:
393 L = len(x)
394 if not d0:
395 d0 = _tm_d0(L)
396 d0_search = clip(d0, 4.5, 8.0)
397 best = None, None, 0.0
398
399 L_ini_min = min(L, L_ini_min) if L_ini_min else L
400 L_ini = [L_ini_min] + list(filter(lambda x: x > L_ini_min,
401 [L // (2 ** n_init) for n_init in range(6)]))
402
403
404
405 for L_init in L_ini:
406 for iL in range(0, L - L_init + 1, min(L_init, iL_step)):
407 mask = zeros(L, bool)
408 mask[iL:iL + L_init] = True
409
410
411 for i in range(20):
412 R, t = fit_method(x[mask], y[mask])
413
414 d = distance(x, dot(y, R.T) + t)
415 score = sum(1 / (1 + (d / d0) ** 2)) / L
416
417 if score > best[-1]:
418 best = R, t, score
419
420 mask_prev = mask
421 cutoff = d0_search + (-1 if i == 0 else 1)
422 while True:
423 mask = d < cutoff
424 if sum(mask) >= 3 or 3 >= len(mask):
425 break
426 cutoff += 0.5
427
428 if (mask == mask_prev).all():
429 break
430
431 return best
432
434 """
435 Compute the mean of a set of (optionally weighted) points.
436
437 @param x: array of rank (n,d) where n is the number of points
438 and d the dimension
439 @type x: numpy.array
440 @param m: rank (n,) array of masses / weights
441 @type m: numpy.array
442
443 @return: center of mass
444 @rtype: (d,) numpy.array
445 """
446 if m is None:
447 return x.mean(0)
448 else:
449 from numpy import dot
450
451 return dot(m, x) / m.sum()
452
454 """
455 Compute the radius of gyration of a set of (optionally weighted) points.
456
457 @param x: array of rank (n,d) where n is the number of points
458 and d the dimension
459 @type x: numpy.array
460 @param m: rank (n,) array of masses / weights
461 @type m: numpy.array
462
463 @return: center of mass
464 @rtype: (d,) numpy.array
465 """
466 from numpy import sqrt, dot
467
468 x = x - center_of_mass(x, m)
469 if m is None:
470 return sqrt((x ** 2).sum() / len(x))
471 else:
472 return sqrt(dot(m, (x ** 2).sum(1)) / m.sum())
473
475 """
476 Compute the tensor second moments of a set of (optionally weighted) points.
477
478 @param x: array of rank (n,d) where n is the number of points
479 and d the dimension
480 @type x: numpy.array
481 @param m: rank (n,) array of masses / weights
482 @type m: numpy.array
483
484 @return: second moments
485 @rtype: (d,d) numpy.array
486 """
487 from numpy import dot
488
489 x = (x - center_of_mass(x, m)).T
490
491 if m is not None:
492 return dot(x * m, x.T)
493 else:
494 return dot(x, x.T)
495
497 """
498 Compute the inertia tensor of a set of (optionally weighted) points.
499
500 @param x: array of rank (n,d) where n is the number of points
501 and d the dimension
502 @type x: numpy.array
503 @param m: rank (n,) array of masses / weights
504 @type m: numpy.array
505
506 @return: inertia tensor
507 @rtype: (d,d) numpy.array
508 """
509 from numpy import dot, eye
510
511 x = (x - center_of_mass(x, m)).T
512 r2 = (x ** 2).sum(0)
513
514 if m is not None:
515 return eye(x.shape[0]) * dot(m, r2) - dot(x * m, x.T)
516 else:
517 return eye(x.shape[0]) * r2.sum() - dot(x, x.T)
518
520 """
521 Calculates a matrix of pairwise distances
522
523 @param X: m x n input vector
524 @type X: numpy array
525
526 @param Y: k x n input vector or None, which defaults to Y=X
527 @type Y: numpy array
528
529 @return: m x k distance matrix
530 @rtype: numpy array
531 """
532 from numpy import add, clip, sqrt, dot, transpose, sum
533
534 if Y is None: Y = X
535
536 if X.ndim < 2: X = X.reshape((1, -1))
537 if Y.ndim < 2: Y = Y.reshape((1, -1))
538
539 C = dot(X, transpose(Y))
540 S = add.outer(sum(X ** 2, 1), sum(Y ** 2, 1))
541
542 return sqrt(clip(S - 2 * C, 0., 1e300))
543
545 """
546 Squared distance between C{X} and C{Y} along the last axis. For details, see L{distance}.
547
548 @return: scalar or array of length m
549 @rtype: (m,) numpy.array
550 """
551 return ((numpy.asarray(X) - Y) ** 2).sum(-1)
552
554 """
555 Distance between C{X} and C{Y} along the last axis.
556
557 @param X: m x n input vector
558 @type X: numpy array
559 @param Y: m x n input vector
560 @type Y: numpy array
561 @return: scalar or array of length m
562 @rtype: (m,) numpy.array
563 """
564 return distance_sq(X, Y) ** 0.5
565
567 """
568 Calculate an average structure from an ensemble of structures
569 (i.e. X is a rank-3 tensor: X[i] is a (N,3) configuration matrix).
570
571 @param X: m x n x 3 input vector
572 @type X: numpy array
573
574 @return: average structure
575 @rtype: (n,3) numpy.array
576 """
577 from numpy.linalg import eigh
578
579 B = csb.numeric.gower_matrix(X)
580 v, U = eigh(B)
581 if numpy.iscomplex(v).any():
582 v = v.real
583 if numpy.iscomplex(U).any():
584 U = U.real
585
586 indices = numpy.argsort(v)[-3:]
587 v = numpy.take(v, indices, 0)
588 U = numpy.take(U, indices, 1)
589
590 x = U * numpy.sqrt(v)
591 i = 0
592 while is_mirror_image(x, X[0]) and i < 2:
593 x[:, i] *= -1
594 i += 1
595 return x
596
597
599 """
600 Check if two configurations X and Y are mirror images
601 (i.e. their optimal superposition involves a reflection).
602
603 @param X: n x 3 input vector
604 @type X: numpy array
605 @param Y: n x 3 input vector
606 @type Y: numpy array
607
608 @rtype: bool
609 """
610 from numpy.linalg import det, svd
611
612
613
614 X = X - numpy.mean(X, 0)
615 Y = Y - numpy.mean(Y, 0)
616
617
618
619 V, L, U = svd(numpy.dot(numpy.transpose(X), Y))
620
621 R = numpy.dot(V, U)
622
623 return det(R) < 0
624
625
626