Package csb :: Package bio :: Package utils
[frames] | no frames]

Source Code for Package csb.bio.utils

  1  """ 
  2  Computational utility functions.  
  3   
  4  This module defines a number of low-level, numerical, high-performance utility 
  5  functions like L{rmsd} for example. 
  6  """ 
  7   
  8  import numpy 
  9  import numpy.random 
 10   
 11  import csb.numeric 
 12   
 13   
14 -def fit(X, Y):
15 """ 16 Return the translation vector and the rotation matrix 17 minimizing the RMSD between two sets of vectors, i.e. 18 if 19 20 >>> R,t = fit(X,Y) 21 22 then 23 24 >>> Y = dot(Y, transpose(R)) + t 25 26 will be the fitted configuration. 27 28 @param X: 3 x n input vector 29 @type X: numpy array 30 31 @param Y: 3 x n input vector 32 @type Y: numpy array 33 34 @return: 3 x 3 rotation matrix and 3 x 1 translation vector 35 @rtype: tuple 36 """ 37 38 from numpy.linalg import svd, det 39 from numpy import dot 40 41 ## center configurations 42 43 x = X.mean(0) 44 y = Y.mean(0) 45 46 ## SVD of correlation matrix 47 48 V, _L, U = svd(dot((X - x).T, Y - y)) 49 50 ## calculate rotation and translation 51 52 R = dot(V, U) 53 54 if det(R) < 0.: 55 U[-1] *= -1 56 R = dot(V, U) 57 58 t = x - dot(R, y) 59 60 return R, t
61
62 -def wfit(X, Y, w):
63 """ 64 Return the translation vector and the rotation matrix 65 minimizing the weighted RMSD between two sets of vectors, i.e. 66 if 67 68 >>> R,t = wfit(X,Y,w) 69 70 then 71 72 >>> Y = dot(Y, transpose(R)) + t 73 74 will be the fitted configuration. 75 76 @param X: 3 x n input vector 77 @type X: numpy array 78 79 @param Y: 3 x n input vector 80 @type Y: numpy array 81 82 @param w: input weights 83 @type w: numpy array 84 85 @return: 3 x 3 rotation matrix and 3 x 1 translation vector 86 @rtype: tuple 87 """ 88 89 from numpy.linalg import svd, det 90 from numpy import dot, transpose, average 91 92 ## center configurations 93 94 norm = sum(w) 95 x = dot(w, X) / norm 96 y = dot(w, Y) / norm 97 98 ## SVD of correlation matrix 99 100 V, _L, U = svd(dot(transpose(X - x) * w, Y - y)) 101 102 ## calculate rotation and translation 103 104 R = dot(V, U) 105 106 if det(R) < 0.: 107 U[2] *= -1 108 R = dot(V, U) 109 110 t = x - dot(R, y) 111 112 return R, t
113
114 -def probabilistic_fit(X, Y, w=None, niter=10):
115 """ 116 Generate a superposition of X,Y where:: 117 118 R ~ exp(trace(dot(transpose(dot(transpose(X-t), Y)), R))) 119 t ~ N(t_opt, 1 / sqrt(N)) 120 """ 121 122 from csb.statistics.rand import random_rotation 123 from numpy import dot, transpose, average 124 125 if w is None: 126 R, t = fit(X, Y) 127 else: 128 R, t = wfit(X, Y, w) 129 130 N = len(X) 131 132 for i in range(niter): 133 ## sample rotation 134 if w is None: 135 A = dot(transpose(X - t), Y) 136 else: 137 A = dot(transpose(X - t) * w, Y) 138 139 R = random_rotation(A) 140 141 ## sample translation (without prior so far) 142 if w is None: 143 mu = average(X - dot(Y, transpose(R)), 0) 144 t = numpy.random.standard_normal(len(mu)) / numpy.sqrt(N) + mu 145 else: 146 mu = dot(w, X - dot(Y, transpose(R))) / numpy.sum(w) 147 t = numpy.random.standard_normal(len(mu)) / numpy.sqrt(numpy.sum(w)) + mu 148 149 return R, t
150
151 -def fit_wellordered(X, Y, n_iter=None, n_stdv=2, tol_rmsd=.5, 152 tol_stdv=0.05, full_output=False):
153 """ 154 Matche two arrays onto each other by iteratively throwing out 155 highly deviating entries. 156 157 (Reference: Nilges et al.: Delineating well-ordered regions in 158 protein structure ensembles). 159 160 @param X: 3 x n input vector 161 @type X: numpy array 162 163 @param Y: 3 x n input vector 164 @type Y: numpy array 165 166 @param n_stdv: number of standard deviations above which points are considered to be outliers 167 @param tol_rmsd: tolerance in rmsd 168 @param tol_stdv: tolerance in standard deviations 169 @param full_output: also return full history of values calculated by the algorithm 170 """ 171 172 from numpy import ones, compress, dot, transpose, sqrt, sum, nonzero, std, average 173 174 175 rmsd_list = [] 176 177 rmsd_old = 0. 178 stdv_old = 0. 179 180 n = 0 181 converged = False 182 183 mask = ones(X.shape[0]) 184 185 while not converged: 186 187 ## find transformation for best match 188 189 R, t = fit(compress(mask, X, 0), compress(mask, Y, 0)) 190 191 ## calculate RMSD profile 192 193 d = sqrt(sum((X - dot(Y, transpose(R)) - t) ** 2, 1)) 194 195 ## calculate rmsd and stdv 196 197 rmsd = sqrt(average(compress(mask, d) ** 2, 0)) 198 stdv = std(compress(mask, d)) 199 200 ## check conditions for convergence 201 202 if stdv < 1e-10: break 203 204 d_rmsd = abs(rmsd - rmsd_old) 205 d_stdv = abs(1 - stdv_old / stdv) 206 207 if d_rmsd < tol_rmsd: 208 if d_stdv < tol_stdv: 209 converged = 1 210 else: 211 stdv_old = stdv 212 else: 213 rmsd_old = rmsd 214 stdv_old = stdv 215 216 ## store result 217 218 perc = average(1. * mask) 219 220 ## throw out non-matching rows 221 222 new_mask = mask * (d < rmsd + n_stdv * stdv) 223 outliers = nonzero(mask - new_mask) 224 rmsd_list.append([perc, rmsd, outliers]) 225 226 mask = new_mask 227 228 if n_iter and n >= n_iter: break 229 230 n += 1 231 232 if full_output: 233 return (R, t), rmsd_list 234 else: 235 return (R, t)
236
237 -def rmsd(X, Y):
238 """ 239 Calculate the root mean squared deviation (RMSD) using Kabsch' formula. 240 241 @param X: 3 x n input vector 242 @type X: numpy array 243 @param Y: 3 x n input vector 244 @type Y: numpy array 245 246 @return: rmsd value between the input vectors 247 @rtype: float 248 """ 249 250 from numpy import sum, dot, transpose, sqrt, clip, average 251 from numpy.linalg import svd 252 253 X = X - average(X, 0) 254 Y = Y - average(Y, 0) 255 256 R_x = sum(X ** 2) 257 R_y = sum(Y ** 2) 258 259 L = svd(dot(transpose(Y), X))[1] 260 261 return sqrt(clip(R_x + R_y - 2 * sum(L), 0., 1e300) / len(X))
262
263 -def rmsd_cur(X, Y):
264 """ 265 Calculate the RMSD of two conformations as they are (no fitting is done). 266 For details, see L{rmsd}. 267 268 @return: rmsd value between the input vectors 269 @rtype: float 270 """ 271 return distance_sq(X, Y).mean() ** 0.5
272
273 -def wrmsd(X, Y, w):
274 """ 275 Calculate the weighted root mean squared deviation (wRMSD) using Kabsch' 276 formula. 277 278 @param X: 3 x n input vector 279 @type X: numpy array 280 @param Y: 3 x n input vector 281 @type Y: numpy array 282 @param w: input weights 283 @type w: numpy array 284 285 @return: rmsd value between the input vectors 286 @rtype: float 287 """ 288 289 from numpy import sum, dot, transpose, sqrt, clip, average 290 from numpy.linalg import svd 291 292 ## normalize weights 293 294 w = w / w.sum() 295 296 X = X - dot(w, X) 297 Y = Y - dot(w, Y) 298 299 R_x = sum(X.T ** 2 * w) 300 R_y = sum(Y.T ** 2 * w) 301 302 L = svd(dot(transpose(Y) * w, X))[1] 303 304 return sqrt(clip(R_x + R_y - 2 * sum(L), 0., 1e300))
305
306 -def torsion_rmsd(x, y):
307 """ 308 Compute the circular RMSD of two phi/psi angle sets. 309 310 @param x: query phi/psi angles (Nx2 array, in radians) 311 @type x: array 312 @param y: subject phi/psi angles (Nx2 array, in radians) 313 @type y: array 314 315 @rtype: float 316 """ 317 from numpy import array, sin, cos, sqrt 318 319 phi, psi = (x - y).T 320 assert len(phi) == len(psi) 321 322 r = sin(phi).sum() ** 2 + cos(phi).sum() ** 2 + sin(psi).sum() ** 2 + cos(psi).sum() ** 2 323 return 1 - (1.0 / len(phi)) * sqrt(r / 2.0)
324
325 -def _tm_d0(Lmin):
326 327 from numpy import power 328 329 if Lmin > 15: 330 d0 = 1.24 * power(Lmin - 15.0, 1.0 / 3.0) - 1.8 331 else: 332 d0 = 0.5 333 334 return max(0.5, d0)
335
336 -def tm_score(x, y, L=None, d0=None):
337 """ 338 Evaluate the TM-score of two conformations as they are (no fitting is done). 339 340 @param x: 3 x n input array 341 @type x: numpy array 342 @param y: 3 x n input array 343 @type y: numpy array 344 @param L: length for normalization (default: C{len(x)}) 345 @type L: int 346 @param d0: d0 in Angstroms (default: calculate from C{L}) 347 @type d0: float 348 349 @return: computed TM-score 350 @rtype: float 351 """ 352 from numpy import sum 353 354 if not L: 355 L = len(x) 356 if not d0: 357 d0 = _tm_d0(L) 358 d = distance(x, y) 359 360 return sum(1 / (1 + (d / d0) ** 2)) / L
361
362 -def tm_superimpose(x, y, fit_method=fit, L=None, d0=None, L_ini_min=4, iL_step=1):
363 """ 364 Compute the TM-score of two protein coordinate vector sets. 365 Reference: http://zhanglab.ccmb.med.umich.edu/TM-score 366 367 @param x: 3 x n input vector 368 @type x: numpy.array 369 @param y: 3 x n input vector 370 @type y: numpy.array 371 @param fit_method: a reference to a proper fitting function, e.g. fit 372 or fit_wellordered 373 @type fit_method: function 374 @param L: length for normalization (default: C{len(x)}) 375 @type L: int 376 @param d0: d0 in Angstroms (default: calculate from C{L}) 377 @type d0: float 378 @param L_ini_min: minimum length of initial alignment window (increase 379 to speed up but loose precision, a value of 0 disables local alignment 380 initialization) 381 @type L_ini_min: int 382 @param iL_step: initial alignment window shift (increase to speed up 383 but loose precision) 384 @type iL_step: int 385 386 @return: rotation matrix, translation vector, TM-score 387 @rtype: tuple 388 """ 389 from numpy import asarray, sum, dot, zeros, clip 390 391 x, y = asarray(x), asarray(y) 392 if not L: 393 L = len(x) 394 if not d0: 395 d0 = _tm_d0(L) 396 d0_search = clip(d0, 4.5, 8.0) 397 best = None, None, 0.0 398 399 L_ini_min = min(L, L_ini_min) if L_ini_min else L 400 L_ini = [L_ini_min] + list(filter(lambda x: x > L_ini_min, 401 [L // (2 ** n_init) for n_init in range(6)])) 402 403 # the outer two loops define a sliding window of different sizes for the 404 # initial local alignment (disabled with L_ini_min=0) 405 for L_init in L_ini: 406 for iL in range(0, L - L_init + 1, min(L_init, iL_step)): 407 mask = zeros(L, bool) 408 mask[iL:iL + L_init] = True 409 410 # refine mask until convergence, similar to fit_wellordered 411 for i in range(20): 412 R, t = fit_method(x[mask], y[mask]) 413 414 d = distance(x, dot(y, R.T) + t) 415 score = sum(1 / (1 + (d / d0) ** 2)) / L 416 417 if score > best[-1]: 418 best = R, t, score 419 420 mask_prev = mask 421 cutoff = d0_search + (-1 if i == 0 else 1) 422 while True: 423 mask = d < cutoff 424 if sum(mask) >= 3 or 3 >= len(mask): 425 break 426 cutoff += 0.5 427 428 if (mask == mask_prev).all(): 429 break 430 431 return best
432
433 -def center_of_mass(x, m=None):
434 """ 435 Compute the mean of a set of (optionally weighted) points. 436 437 @param x: array of rank (n,d) where n is the number of points 438 and d the dimension 439 @type x: numpy.array 440 @param m: rank (n,) array of masses / weights 441 @type m: numpy.array 442 443 @return: center of mass 444 @rtype: (d,) numpy.array 445 """ 446 if m is None: 447 return x.mean(0) 448 else: 449 from numpy import dot 450 451 return dot(m, x) / m.sum()
452
453 -def radius_of_gyration(x, m=None):
454 """ 455 Compute the radius of gyration of a set of (optionally weighted) points. 456 457 @param x: array of rank (n,d) where n is the number of points 458 and d the dimension 459 @type x: numpy.array 460 @param m: rank (n,) array of masses / weights 461 @type m: numpy.array 462 463 @return: center of mass 464 @rtype: (d,) numpy.array 465 """ 466 from numpy import sqrt, dot 467 468 x = x - center_of_mass(x, m) 469 if m is None: 470 return sqrt((x ** 2).sum() / len(x)) 471 else: 472 return sqrt(dot(m, (x ** 2).sum(1)) / m.sum())
473
474 -def second_moments(x, m=None):
475 """ 476 Compute the tensor second moments of a set of (optionally weighted) points. 477 478 @param x: array of rank (n,d) where n is the number of points 479 and d the dimension 480 @type x: numpy.array 481 @param m: rank (n,) array of masses / weights 482 @type m: numpy.array 483 484 @return: second moments 485 @rtype: (d,d) numpy.array 486 """ 487 from numpy import dot 488 489 x = (x - center_of_mass(x, m)).T 490 491 if m is not None: 492 return dot(x * m, x.T) 493 else: 494 return dot(x, x.T)
495
496 -def inertia_tensor(x, m=None):
497 """ 498 Compute the inertia tensor of a set of (optionally weighted) points. 499 500 @param x: array of rank (n,d) where n is the number of points 501 and d the dimension 502 @type x: numpy.array 503 @param m: rank (n,) array of masses / weights 504 @type m: numpy.array 505 506 @return: inertia tensor 507 @rtype: (d,d) numpy.array 508 """ 509 from numpy import dot, eye 510 511 x = (x - center_of_mass(x, m)).T 512 r2 = (x ** 2).sum(0) 513 514 if m is not None: 515 return eye(x.shape[0]) * dot(m, r2) - dot(x * m, x.T) 516 else: 517 return eye(x.shape[0]) * r2.sum() - dot(x, x.T)
518
519 -def distance_matrix(X, Y=None):
520 """ 521 Calculates a matrix of pairwise distances 522 523 @param X: m x n input vector 524 @type X: numpy array 525 526 @param Y: k x n input vector or None, which defaults to Y=X 527 @type Y: numpy array 528 529 @return: m x k distance matrix 530 @rtype: numpy array 531 """ 532 from numpy import add, clip, sqrt, dot, transpose, sum 533 534 if Y is None: Y = X 535 536 if X.ndim < 2: X = X.reshape((1, -1)) 537 if Y.ndim < 2: Y = Y.reshape((1, -1)) 538 539 C = dot(X, transpose(Y)) 540 S = add.outer(sum(X ** 2, 1), sum(Y ** 2, 1)) 541 542 return sqrt(clip(S - 2 * C, 0., 1e300))
543
544 -def distance_sq(X, Y):
545 """ 546 Squared distance between C{X} and C{Y} along the last axis. For details, see L{distance}. 547 548 @return: scalar or array of length m 549 @rtype: (m,) numpy.array 550 """ 551 return ((numpy.asarray(X) - Y) ** 2).sum(-1)
552
553 -def distance(X, Y):
554 """ 555 Distance between C{X} and C{Y} along the last axis. 556 557 @param X: m x n input vector 558 @type X: numpy array 559 @param Y: m x n input vector 560 @type Y: numpy array 561 @return: scalar or array of length m 562 @rtype: (m,) numpy.array 563 """ 564 return distance_sq(X, Y) ** 0.5
565
566 -def average_structure(X):
567 """ 568 Calculate an average structure from an ensemble of structures 569 (i.e. X is a rank-3 tensor: X[i] is a (N,3) configuration matrix). 570 571 @param X: m x n x 3 input vector 572 @type X: numpy array 573 574 @return: average structure 575 @rtype: (n,3) numpy.array 576 """ 577 from numpy.linalg import eigh 578 579 B = csb.numeric.gower_matrix(X) 580 v, U = eigh(B) 581 if numpy.iscomplex(v).any(): 582 v = v.real 583 if numpy.iscomplex(U).any(): 584 U = U.real 585 586 indices = numpy.argsort(v)[-3:] 587 v = numpy.take(v, indices, 0) 588 U = numpy.take(U, indices, 1) 589 590 x = U * numpy.sqrt(v) 591 i = 0 592 while is_mirror_image(x, X[0]) and i < 2: 593 x[:, i] *= -1 594 i += 1 595 return x
596 597
598 -def is_mirror_image(X, Y):
599 """ 600 Check if two configurations X and Y are mirror images 601 (i.e. their optimal superposition involves a reflection). 602 603 @param X: n x 3 input vector 604 @type X: numpy array 605 @param Y: n x 3 input vector 606 @type Y: numpy array 607 608 @rtype: bool 609 """ 610 from numpy.linalg import det, svd 611 612 ## center configurations 613 614 X = X - numpy.mean(X, 0) 615 Y = Y - numpy.mean(Y, 0) 616 617 ## SVD of correlation matrix 618 619 V, L, U = svd(numpy.dot(numpy.transpose(X), Y)) #@UnusedVariable 620 621 R = numpy.dot(V, U) 622 623 return det(R) < 0
624 625 # vi:expandtab:smarttab:sw=4 626