Source code for knockpy.mrc

""" Methods for minimum-reconstructability knockoffs."""

import warnings
import time
import numpy as np
import scipy as sp
from scipy import stats
from . import utilities, constants
try:
    import choldate
    CHOLDATE_AVAILABLE = True
except:
    CHOLDATE_AVAILABLE = False

[docs]def cholupdate(R, x, add=True): """ Performs rank one updates to a cholesky factor `R` in place. Parameters ---------- R : np.ndarray A ``(p,p)``-shaped upper-triangular matrix. x : np.ndarray A ``(p,)``-shaped vector. add : bool If True, performs a rank one update; else performs a rank one downdate. Returns ------- R : np.ndarray Suppose the parameter R was a cholesky factor of a matrix V. Upon return, R is the cholesky factor of ``V +/- np.outer(x, x)``. Notes ----- - This function modifies both ``R`` and ``x`` in place. - The ``choldate`` package is a much faster and more numerically stable alternative. """ p = np.size(x) x = x.T for k in range(p): if add: r = np.sqrt(R[k,k]**2 + x[k]**2) else: r = np.sqrt(R[k,k]**2 - x[k]**2) c = r/R[k,k] s = x[k]/R[k,k] R[k,k] = r if add: R[k,k+1:p] = (R[k,k+1:p] + s*x[k+1:p])/c else: R[k,k+1:p] = (R[k,k+1:p] - s*x[k+1:p])/c x[k+1:p]= c*x[k+1:p] - s*R[k, k+1:p] return R
### Coordinate Descent Solvers
[docs]def mvr_loss(Sigma, S, smoothing=0): """ Computes minimum variance-based reconstructability loss for knockoffs, e.g., the trace of the feature-knockoff precision matrix. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X S : np.ndarray ``(p, p)``-shaped S-matrix used to generate knockoffs smoothing : float Add ``smoothing`` to all eigenvalues of the feature-knockoff precision matrix before inverting to avoid numerical instability. Defaults to 0. Returns ------- loss : float The MVR loss for Sigma and S. This is infinite if S is not feasible. """ # Check for negative eigenvalues eigs_S = np.diag(S) eigs_diff = np.linalg.eigh(2 * Sigma - S)[0] if np.min(eigs_S) < 0 or np.min(eigs_diff) < 0: return np.inf # Inverse of eigenvalues trace_invG = (1 / (eigs_diff + smoothing)).sum() + (1 / (eigs_S + smoothing)).sum() return trace_invG
[docs]def maxent_loss(Sigma, S, smoothing=0): """ Computes the log determinant of the feature-knockoff precision matrix, which is proportional to the negative entropy of [X, tilde{X}]. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X S : np.ndarray ``(p, p)``-shaped S-matrix used to generate knockoffs smoothing : float Add ``smoothing`` to all eigenvalues of the feature-knockoff precision matrix before taking the log determinant to avoid numerical instability. Defaults to 0. Returns ------- loss : float The maxent loss for Sigma and S. This is infinite if S is not feasible. """ p = Sigma.shape[0] eigs_S = np.diag(S) eigs_diff = np.linalg.eigh(2 * Sigma - S)[0] if np.min(eigs_S) < 0 or np.min(eigs_diff) < 0: return np.inf det_invG = np.log(1/(eigs_diff + smoothing)).sum() det_invG = det_invG + np.log(1/(eigs_S + smoothing)).sum() return det_invG
[docs]def mmi_loss(*args, **kwargs): """ Computes the log determinant of the feature-knockoff precision matrix, which is proportional mutual information between X and knockoffs. This is identical to ``maxent_loss`` and exists only for backwards compatability. """ return maxent_loss(*args, **kwargs)
[docs]def solve_mvr_quadratic(cn, cd, sj, i=None, min_eig=None, acc_rate=1, smoothing=0): """ Solves a quadratic equation to find the optimal updates for the MVR S-matrix based off of cn and cd. See https://arxiv.org/pdf/2011.14625.pdf """ # 1. Construct quadratic equation # We want to minimize 1/(sj + delta) - (delta * cn)/(1 - delta * cd) coef2 = -1 * cn - np.power(cd, 2) coef1 = 2 * (-1 * cn * (sj + smoothing) + cd) coef0 = -1 * cn * (sj + smoothing) ** 2 - 1 orig_options = np.roots(np.array([coef2, coef1, coef0])) # 2. Eliminate complex solutions options = np.array([delta for delta in orig_options if np.imag(delta) == 0]) # Eliminate solutions which violate PSD-ness upper_bound = 1 / cd lower_bound = -1 * sj options = np.array( [ delta for delta in options if delta < upper_bound and delta > lower_bound ] ) if options.shape[0] == 0: raise RuntimeError( f"All quadratic solutions ({orig_options}) were infeasible or imaginary" ) # 3. If multiple solutions left (unlikely), pick the smaller one losses = 1 / (sj + options) - (options * cn) / (1 - options * cd) if losses[0] == losses.min(): delta = options[0] else: delta = options[1] # 4. Account for rejections if acc_rate < 1: extra_space = min(min_eig, 0.05) / (i + 2) # Helps deal with coord desc opt_postrej_value = sj + delta opt_prerej_value = opt_postrej_value / (acc_rate) opt_prerej_value = min( sj + upper_bound - extra_space, max(opt_prerej_value, extra_space), ) delta = opt_prerej_value - sj return delta
[docs]def solve_mvr_factored( D, U, tol=1e-5, verbose=False, num_iter=15, converge_tol=1e-4, ): """ Computes S-matrix used to generate mvr knockoffs using coordinate descent assuming that the covariance matrix follows a factor model. This means Sigma = D + UU^T for a p x p diagonal matrix D and a p x k matrix U. Parameters ---------- D : np.ndarray ``p``-shaped array of diagonal elements. U : np.ndarray ``(p, k)``-shaped matrix. Usually k << p. tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. converge_tol : float A parameter specifying the criteria for convergence. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Initial constants p = D.shape[0] k = U.shape[1] inds = np.arange(p) loss = np.inf # TODO: this is Omega(p^3). # Maybe use https://www.sciencedirect.com/science/article/pii/S2215016118300062 Sigma = np.diag(D) + np.dot(U, U.T) diag_Sigma = np.diag(Sigma) mineig = np.linalg.eigh(Sigma)[0].min() if mineig < 0: raise ValueError("D + UU^T is not PSD") # Initialize values time0 = time.time() decayed_improvement = 1 Sdiag = np.zeros(p) + mineig # These are k x k matrices Q, R = sp.linalg.qr(np.eye(k) + 2*np.dot(U.T / (2*D - Sdiag), U)) quadtime = 0 solvetime = 0 for i in range(num_iter): np.random.shuffle(inds) for j in inds: # 1. Calculate parameters cd/cn # (see https://arxiv.org/pdf/2011.14625.pdf) b = np.dot(Q.T, U[j].T/(2*D[j] - Sdiag[j])) x = sp.linalg.solve_triangular(R, b, lower=False) diff_inv_ej = -2*np.dot(U, x)/(2*D - Sdiag) diff_inv_ej[j] = diff_inv_ej[j] + 1/(2*D[j] - Sdiag[j]) cd = diff_inv_ej[j] cn = -1 * np.power(diff_inv_ej, 2).sum() # 2. Find optimal update delta = solve_mvr_quadratic(cn=cn, cd=cd, sj=Sdiag[j]) # 3. Rank one update to QR decomp muj = U[j].T / (2*D[j] - Sdiag[j]) c = -delta/(1 - delta/(2*D[j] - Sdiag[j])) Q, R = sp.linalg.qr_update( Q=Q, R=R, u=-2*c*muj, v=muj, ) # 4. Update S Sdiag[j] = Sdiag[j] + delta return np.diag(Sdiag)
[docs]def solve_mvr( Sigma, tol=1e-5, verbose=False, num_iter=10, smoothing=0, rej_rate=0, converge_tol=1, choldate_warning=True, ): """ Computes S-matrix used to generate minimum variance-based reconstructability knockoffs using coordinate descent. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. smoothing : float Add ``smoothing`` to all eigenvalues of the feature-knockoff precision matrix before inverting to avoid numerical instability. Defaults to 0. converge_tol : float A parameter specifying the criteria for convergence. choldate_warning : bool If True, will warn the user if choldate is not installed. Defaults to True. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Warning if choldate not available if not CHOLDATE_AVAILABLE and choldate_warning: warnings.warn(constants.CHOLDATE_WARNING) # Initial constants time0 = time.time() V = Sigma # Shorthand prevents lines from spilling over p = V.shape[0] inds = np.arange(p) loss = np.inf acc_rate = 1 - rej_rate # Takes a bit longer for rej_rate adjusted to converge if acc_rate < 1: converge_tol = 1e-2 # Initialize values decayed_improvement = 10 min_eig = np.linalg.eigh(V)[0].min() S = min_eig * np.eye(p) L = np.linalg.cholesky(2 * V - S + smoothing * np.eye(p)) for i in range(num_iter): np.random.shuffle(inds) for j in inds: # 1. Compute coefficients cn and cd ej = np.zeros(p) # jth basis element ej[j] = 1 # 1a. Compute cd vd = sp.linalg.solve_triangular(a=L, b=ej, lower=True) cd = np.power(vd, 2).sum() # 1b. Compute vn vn = sp.linalg.solve_triangular(a=L.T, b=vd, lower=False) cn = -1 * np.power(vn, 2).sum() # 2. Construct/solve quadratic equation delta = solve_mvr_quadratic( cn=cn, cd=cd, sj=S[j,j], min_eig=min_eig, i=i, smoothing=smoothing, acc_rate=acc_rate, ) # 3. Update S and L x = np.zeros(p) x[j] = np.sqrt(np.abs(delta)) if delta > 0: if CHOLDATE_AVAILABLE: choldate.choldowndate(L.T, x) else: cholupdate(L.T, x, add=False) else: if CHOLDATE_AVAILABLE: choldate.cholupdate(L.T, x) else: cholupdate(L.T, x, add=True) # Set new value for S S[j, j] += delta # Check for convergence prev_loss = loss loss = mvr_loss(V, acc_rate * S, smoothing=smoothing) if i != 0: decayed_improvement = decayed_improvement / 10 + 9 * (prev_loss - loss) / 10 if verbose: print( f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}" ) if decayed_improvement < converge_tol: if verbose: print(f"Converged after iteration {i} with loss={loss}") break # Ensure validity of solution S = utilities.shift_until_PSD(S, tol=tol) S, _ = utilities.scale_until_PSD(V, S, tol=tol, num_iter=10) return S
[docs]def solve_maxent_factored( D, U, tol=1e-5, verbose=False, num_iter=10, converge_tol=1e-4, ): """ Computes S-matrix used to generate maximum entropy knockoffs using coordinate descent assuming that the covariance matrix follows a factor model. This means Sigma = D + UU^T for a p x p diagonal matrix D and a p x k matrix U. Parameters ---------- D : np.ndarray ``p``-shaped array of diagonal elements. U : np.ndarray ``(p, k)``-shaped matrix. Usually k << p. tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. converge_tol : float A parameter specifying the criteria for convergence. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Initial constants p = D.shape[0] k = U.shape[1] inds = np.arange(p) loss = np.inf # TODO: this is Omega(p^3). # Maybe use https://www.sciencedirect.com/science/article/pii/S2215016118300062 Sigma = np.diag(D) + np.dot(U, U.T) diag_Sigma = np.diag(Sigma) mineig = np.linalg.eigh(Sigma)[0].min() if mineig < 0: raise ValueError("D + UU^T is not PSD") # Initialize values time0 = time.time() decayed_improvement = 1 Sdiag = np.zeros(p) + mineig # These are all k x k matrices Q, R = sp.linalg.qr(np.eye(k) + 2*np.dot(U.T / (2*D - Sdiag), U)) for i in range(num_iter): np.random.shuffle(inds) for j in inds: # Qprime, Rprime are rank-one updates of Q, R # to help solve for the optimal Sj Qprime, Rprime = sp.linalg.qr_update( Q=Q, R=R, u=U[j].T/(2*D[j] - Sdiag[j]), v=-2*U[j].T, overwrite_qruv=False ) # Calculate update for S---for notation, see # https://arxiv.org/pdf/2006.08790.pdf MjUjT = np.dot(np.dot(Qprime, Rprime) - np.eye(k), U[j].T)/2 x = sp.linalg.solve_triangular( a=Rprime, b=np.dot(Qprime.T, MjUjT), lower=False ) sub_term = 4*np.dot(U[j], MjUjT) - 8*np.dot(MjUjT, x) Sjstar = (2*diag_Sigma[j] - sub_term)/2 # Rank one update to QR decomp delta = Sdiag[j] - Sjstar muj = U[j].T / (2*D[j] - Sdiag[j]) c = delta/(1 + delta/(2*D[j] - Sdiag[j])) Q, R = sp.linalg.qr_update( Q=Q, R=R, u=-2*c*muj, v=muj, ) # Update S Sdiag[j] = Sjstar # S = utilities.shift_until_PSD(S, tol=tol) # S, _ = utilities.scale_until_PSD(V, S, tol=tol, num_iter=10) return np.diag(Sdiag)
[docs]def solve_maxent( Sigma, tol=1e-5, verbose=False, num_iter=10, converge_tol=1e-4, choldate_warning=True, ): """ Computes S-matrix used to generate maximum entropy knockoffs using coordinate descent. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. converge_tol : float A parameter specifying the criteria for convergence. choldate_warning : bool If True, will warn the user if choldate is not installed. Defaults to True Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Warning if choldate not available if not CHOLDATE_AVAILABLE and choldate_warning: warnings.warn(constants.CHOLDATE_WARNING) # Initial constants time0 = time.time() V = Sigma # Shorthand prevents lines from spilling over p = V.shape[0] inds = np.arange(p) loss = np.inf # Initialize values decayed_improvement = 1 S = np.linalg.eigh(V)[0].min() * np.eye(p) L = np.linalg.cholesky(2 * V - S) for i in range(num_iter): np.random.shuffle(inds) for j in inds: diff = 2 * V - S # Solve cholesky equation tildey = 2 * V[j].copy() tildey[j] = 0 x = sp.linalg.solve_triangular(a=L, b=tildey, lower=True) # Use cholesky eq to get new update zeta = diff[j, j] x22 = np.power(x, 2).sum() qinvterm = zeta * x22 / (zeta + x22) # Inverse of Qj using SWM formula sjstar = (2 * V[j, j] - qinvterm) / 2 # Rank one update for cholesky delta = S[j, j] - sjstar x = np.zeros(p) x[j] = np.sqrt(np.abs(delta)) if delta > 0: if CHOLDATE_AVAILABLE: choldate.cholupdate(L.T, x) else: cholupdate(L.T, x, add=False) else: if CHOLDATE_AVAILABLE: choldate.choldowndate(L.T, x) else: cholupdate(L.T, x, add=True) # Set new value for S S[j, j] = sjstar # Check for convergence prev_loss = loss loss = maxent_loss(V, S) if i != 0: decayed_improvement = decayed_improvement / 10 + 9 * (prev_loss - loss) / 10 if verbose: print( f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}" ) if decayed_improvement < converge_tol: if verbose: print(f"Converged after iteration {i} with loss={loss}") break # Ensure validity of solution S = utilities.shift_until_PSD(S, tol=tol) S, _ = utilities.scale_until_PSD(V, S, tol=tol, num_iter=10) return S
[docs]def solve_mmi(*args, **kwargs): """ Computes S-matrix used to generate minimum mutual information knockoffs. This is identical to ``solve_maxent`` and exists only for backwards compatability. """ return solve_maxent(*args, **kwargs)
[docs]def solve_ciknock( Sigma, tol=1e-5, num_iter=10, ): """ Computes S-matrix used to generate conditional independence knockoffs. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X tol : float Minimum permissible eigenvalue of 2Sigma - S and S. num_iter : int The number of iterations in the binary search to ensure S is feasible. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs Notes ----- When the S-matrix corresponding to conditional independence knockoffs is not feasible, this computes that S matrix and then does a binary search to find the maximum gamma such that gamma * S is feasible. """ # Compute vanilla S_CI S = 1 / (np.diag(np.linalg.inv(Sigma))) S = np.diag(S) # Ensure validity of solution S = utilities.shift_until_PSD(S, tol=tol) S, _ = utilities.scale_until_PSD(Sigma, S, tol=tol, num_iter=num_iter) return S