Source code for knockpy.mrc

""" Methods for minimum-reconstructability knockoffs."""

import warnings
import time
import numpy as np
import scipy as sp
from scipy import stats
from . import utilities, constants, mac
try:
    import choldate
    CHOLDATE_AVAILABLE = True
except:
    CHOLDATE_AVAILABLE = False

[docs]def cholupdate(R, x, add=True): """ Performs rank one updates to a cholesky factor `R` in place. Parameters ---------- R : np.ndarray A ``(p,p)``-shaped upper-triangular matrix. x : np.ndarray A ``(p,)``-shaped vector. add : bool If True, performs a rank one update; else performs a rank one downdate. Returns ------- R : np.ndarray Suppose the parameter R was a cholesky factor of a matrix V. Upon return, R is the cholesky factor of ``V +/- np.outer(x, x)``. Notes ----- - This function modifies both ``R`` and ``x`` in place. - The ``choldate`` package is a much faster and more numerically stable alternative. """ p = np.size(x) x = x.T for k in range(p): if add: r = np.sqrt(R[k,k]**2 + x[k]**2) else: r = np.sqrt(R[k,k]**2 - x[k]**2) c = r/R[k,k] s = x[k]/R[k,k] R[k,k] = r if add: R[k,k+1:p] = (R[k,k+1:p] + s*x[k+1:p])/c else: R[k,k+1:p] = (R[k,k+1:p] - s*x[k+1:p])/c x[k+1:p]= c*x[k+1:p] - s*R[k, k+1:p] return R
### MVR
[docs]def mvr_loss(Sigma, S, smoothing=0): """ Computes minimum variance-based reconstructability loss for knockoffs, e.g., the trace of the feature-knockoff precision matrix. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X S : np.ndarray ``(p, p)``-shaped S-matrix used to generate knockoffs smoothing : float Add ``smoothing`` to all eigenvalues of the feature-knockoff precision matrix before inverting to avoid numerical instability. Defaults to 0. Returns ------- loss : float The MVR loss for Sigma and S. This is infinite if S is not feasible. """ # Check for negative eigenvalues eigs_S = np.diag(S) eigs_diff = np.linalg.eigh(2 * Sigma - S)[0] if np.min(eigs_S) < 0 or np.min(eigs_diff) < 0: return np.inf # Inverse of eigenvalues trace_invG = (1 / (eigs_diff + smoothing)).sum() + (1 / (eigs_S + smoothing)).sum() return trace_invG
def _solve_mvr_quadratic(cn, cd, sj, i=None, min_eig=None, acc_rate=1, smoothing=0): """ Solves a quadratic equation to find the optimal updates for the MVR S-matrix based off of cn and cd. See https://arxiv.org/pdf/2011.14625.pdf """ # 1. Construct quadratic equation # We want to minimize 1/(sj + delta) - (delta * cn)/(1 - delta * cd) coef2 = -1 * cn - np.power(cd, 2) coef1 = 2 * (-1 * cn * (sj + smoothing) + cd) coef0 = -1 * cn * (sj + smoothing) ** 2 - 1 orig_options = np.roots(np.array([coef2, coef1, coef0])) # 2. Eliminate complex solutions options = np.array([delta for delta in orig_options if np.imag(delta) == 0]) # Eliminate solutions which violate PSD-ness upper_bound = 1 / cd lower_bound = -1 * sj options = np.array( [ delta for delta in options if delta < upper_bound and delta > lower_bound ] ) if options.shape[0] == 0: raise RuntimeError( f"All quadratic solutions ({orig_options}) were infeasible or imaginary" ) # 3. If multiple solutions left (unlikely), pick the smaller one losses = 1 / (sj + options) - (options * cn) / (1 - options * cd) if losses[0] == losses.min(): delta = options[0] else: delta = options[1] # 4. Account for rejections if acc_rate < 1: extra_space = min(min_eig, 0.05) / (i + 2) # Helps deal with coord desc opt_postrej_value = sj + delta opt_prerej_value = opt_postrej_value / (acc_rate) opt_prerej_value = min( sj + upper_bound - extra_space, max(opt_prerej_value, extra_space), ) delta = opt_prerej_value - sj return delta
[docs]def solve_mvr_factored( D, U, tol=1e-5, verbose=False, num_iter=15, converge_tol=1e-4, ): """ Computes S-matrix used to generate mvr knockoffs using coordinate descent assuming that the covariance matrix follows a factor model. This means Sigma = D + UU^T for a p x p diagonal matrix D and a p x k matrix U. Parameters ---------- D : np.ndarray ``p``-shaped array of diagonal elements. U : np.ndarray ``(p, k)``-shaped matrix. Usually k << p. tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. converge_tol : float A parameter specifying the criteria for convergence. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Initial constants p = D.shape[0] k = U.shape[1] inds = np.arange(p) loss = np.inf # TODO: this is Omega(p^3). # Maybe use https://www.sciencedirect.com/science/article/pii/S2215016118300062 Sigma = np.diag(D) + np.dot(U, U.T) diag_Sigma = np.diag(Sigma) mineig = utilities.calc_mineig(Sigma) if mineig < 0: raise ValueError("D + UU^T is not PSD") # Initialize values time0 = time.time() decayed_improvement = 1 Sdiag = np.zeros(p) + mineig # These are k x k matrices Q, R = sp.linalg.qr(np.eye(k) + 2*np.dot(U.T / (2*D - Sdiag), U)) quadtime = 0 solvetime = 0 for i in range(num_iter): np.random.shuffle(inds) for j in inds: # 1. Calculate parameters cd/cn # (see https://arxiv.org/pdf/2011.14625.pdf) b = np.dot(Q.T, U[j].T/(2*D[j] - Sdiag[j])) x = sp.linalg.solve_triangular(R, b, lower=False) diff_inv_ej = -2*np.dot(U, x)/(2*D - Sdiag) diff_inv_ej[j] = diff_inv_ej[j] + 1/(2*D[j] - Sdiag[j]) cd = diff_inv_ej[j] cn = -1 * np.power(diff_inv_ej, 2).sum() # 2. Find optimal update delta = _solve_mvr_quadratic(cn=cn, cd=cd, sj=Sdiag[j]) # 3. Rank one update to QR decomp muj = U[j].T / (2*D[j] - Sdiag[j]) c = -delta/(1 - delta/(2*D[j] - Sdiag[j])) Q, R = sp.linalg.qr_update( Q=Q, R=R, u=-2*c*muj, v=muj, ) # 4. Update S Sdiag[j] = Sdiag[j] + delta return np.diag(Sdiag)
def _solve_mvr_ungrouped( Sigma, verbose=False, num_iter=50, smoothing=0, rej_rate=0, converge_tol=1e-2, choldate_warning=True, ): """ Computes S-matrix used to generate minimum variance-based reconstructability knockoffs using coordinate descent. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. smoothing : float Add ``smoothing`` to all eigenvalues of the feature-knockoff precision matrix before inverting to avoid numerical instability. Defaults to 0. converge_tol : float A parameter specifying the criteria for convergence. choldate_warning : bool If True, will warn the user if choldate is not installed. Defaults to True. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Warning if choldate not available if not CHOLDATE_AVAILABLE and choldate_warning: warnings.warn(constants.CHOLDATE_WARNING) # Initial constants time0 = time.time() V = Sigma # Shorthand prevents lines from spilling over p = V.shape[0] inds = np.arange(p) loss = np.inf acc_rate = 1 - rej_rate # Initialize values decayed_improvement = 10 min_eig = utilities.calc_mineig(V) S = min_eig * np.eye(p) L = np.linalg.cholesky(2 * V - S + smoothing * np.eye(p)) for i in range(num_iter): np.random.shuffle(inds) for j in inds: # 1. Compute coefficients cn and cd ej = np.zeros(p) # jth basis element ej[j] = 1 # 1a. Compute cd vd = sp.linalg.solve_triangular(a=L, b=ej, lower=True) cd = np.power(vd, 2).sum() # 1b. Compute vn vn = sp.linalg.solve_triangular(a=L.T, b=vd, lower=False) cn = -1 * np.power(vn, 2).sum() # 2. Construct/solve quadratic equation delta = _solve_mvr_quadratic( cn=cn, cd=cd, sj=S[j,j], min_eig=min_eig, i=i, smoothing=smoothing, acc_rate=acc_rate, ) # 3. Update S and L x = np.zeros(p) x[j] = np.sqrt(np.abs(delta)) if delta > 0: if CHOLDATE_AVAILABLE: choldate.choldowndate(L.T, x) else: cholupdate(L.T, x, add=False) else: if CHOLDATE_AVAILABLE: choldate.cholupdate(L.T, x) else: cholupdate(L.T, x, add=True) # Set new value for S S[j, j] += delta # Check for convergence prev_loss = loss loss = mvr_loss(V, acc_rate * S, smoothing=smoothing) if i != 0: decayed_improvement = decayed_improvement / 10 + 9 * (prev_loss - loss) / 10 if verbose: print( f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}" ) if decayed_improvement < converge_tol: if verbose: print(f"Converged after iteration {i} with loss={loss}") break return S ### Group MVR def _solve_mvr_quadratic_group(cn_diff, cd_diff, cn_S, cd_S): """ Internal helper function for group mvr. Useful for diagonal elements. """ # 1. Construct and solve quadratic equation coef0 = cn_diff - cn_S coef1 = 2*(cd_S * cn_diff + cn_S * cd_diff) coef2 = cn_diff * np.power(cd_S, 2) - cn_S * np.power(cd_diff, 2) orig_options = np.roots(np.array([coef2, coef1, coef0])) # 2. Eliminate complex solutions and solutions which violate PSD-ness options = np.array([delta for delta in orig_options if np.imag(delta) == 0]) upper_bound = 1 / cd_diff # Ensures 2 Sigma - S is PSD lower_bound = -1 / cd_S # Ensures S is PSD options = options[(options < upper_bound) & (options > lower_bound)] if options.shape[0] == 0: raise RuntimeError( f"All quadratic solutions ({orig_options}) were infeasible or imaginary" ) elif options.shape[0] > 1: raise RuntimeError( f"Multiple feasible solutions (options={options}), need the lower bound" ) return options[0] def _mvr_group_contrib(Q, R, i, j): """ Internal helper function for group mvr. Useful for off-diagonal elements. Parameters ---------- Q, R : QR decomposition i, j: indices for rank-2 update Returns ------- f : function f(delta) equals Tr((QR + delta e_i e_j^T + delta e_j e_i^T)^{-1}) up to a constant not depending on delta. min_delta : int Minimum value of delta for which the prior matrix is PSD. max_delta : int Maximum value of delta for which the prior matrix is PSD. """ # Notational note: W = (QR)^{-1} Wj = sp.linalg.solve_triangular(a=R, b=Q[j], lower=False) Wi = sp.linalg.solve_triangular(a=R, b=Q[i], lower=False) def objective(delta): num = 2 * np.dot(Wj, Wi)*(1 + delta * Wj[i]) num -= delta * Wj[j] * np.dot(Wi, Wi) num -= delta * Wi[i] * np.dot(Wj, Wj) denom = np.power(1 + delta * Wj[i], 2) - np.power(delta, 2) * Wi[i] * Wj[j] return delta * num/denom # Binary search for min / max. value of delta I = np.eye(2) B = np.array([ [Wi[j], Wj[j]], [Wi[i], Wi[j]] ]) eigs_B = np.linalg.eig(B)[0] max_delta = -1 * (eigs_B[eigs_B < 0]) if max_delta.shape[0] == 0: max_delta = np.inf else: max_delta = (1/max_delta).max() - 1e-5 min_delta = -1 * (eigs_B[eigs_B > 0]) if min_delta.shape[0] == 0: min_delta = -np.inf else: min_delta = (1/min_delta).max() + 1e-5 return objective, min_delta, max_delta def _solve_cn_cd(Q, R, j): """ Solves for cn/cd using QR decomp. This is specialized for diagonal elements for group knockoffs. See https://arxiv.org/abs/2011.14625. """ # Notational note: W = (QR)^{-1} Wj = sp.linalg.solve_triangular(a=R, b=Q[j], lower=False) cd = Wj[j] cn = np.power(Wj, 2).sum() return cn, cd def _solve_mvr_grouped( Sigma, groups=None, tol=1e-5, verbose=False, num_iter=20, converge_tol=1e-3, smoothing=0, rej_rate=0, ): """ Computes S-matrix for minimum variance-based reconstructability group knockoffs. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X groups : np.ndarray For group knockoffs, a p-length array of integers from 1 to num_groups such that ``groups[j] == i`` indicates that variable `j` is a member of group `i`. Defaults to ``None`` (regular knockoffs). tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. converge_tol : float A parameter specifying the criteria for convergence. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Todo if smoothing != 0: raise NotImplementedError("Smoothing not implemented for group MVR") if rej_rate != 0: raise NotImplementedError("Rej rate adjustment not implemented for group MVR") # group_id maps to group_indices block_inds = {} for group_id in np.unique(groups): block_inds[group_id] = np.where(groups == group_id)[0] # Each nonzero element of S S_elems = [] for g_id in block_inds: for i_within, i in enumerate(block_inds[g_id]): for j_within, j in enumerate(block_inds[g_id]): if j > i: break S_elems.append(((i, j), (i_within, j_within), g_id)) # Basis elements p = Sigma.shape[0] basis = [] for j in range(p): ej = np.zeros(p) ej[j] = 1 basis.append(ej) # Initial constants time0 = time.time() loss = np.inf acc_rate = 1 - rej_rate # Initialize values decayed_improvement = 10 min_eig = utilities.calc_mineig(Sigma) S = mac.solve_equicorrelated(Sigma, groups)/2 Q, R = np.linalg.qr(2 * Sigma - S + smoothing * np.eye(p)) # Running QR decompositions of blocks of S Sblock_QR = {} for g_id in block_inds: blocksize = block_inds[g_id].shape[0] Sblock = S[block_inds[g_id]][:, block_inds[g_id]] # Easy QR decomp for diagonal matrix Sblock_QR[g_id] = np.linalg.qr(Sblock) for it in range(num_iter): np.random.shuffle(S_elems) # i, j are the coordinates within L # i_within, j_within are the coordinates within S for (i, j), (i_within, j_within), g_id in S_elems: # Retrieve QR decomposition for this block of S Q_S, R_S = Sblock_QR[g_id] # Basis elements within the block d = Q_S.shape[0] ei = np.zeros(d) ei[i_within] = 1 ej = np.zeros(d) ej[j_within] = 1 # For off-diagonal elements: rank-2 update if i != j: # 1. Find optimal delta #stime = time.time() loss_diff, lb_diff, ub_diff = _mvr_group_contrib(Q, R, i, j) max_delta_diff = -1 * lb_diff min_delta_diff = -1 * ub_diff loss_S, min_delta_S, max_delta_S = _mvr_group_contrib( Q_S, R_S, i=i_within, j=j_within ) lower_bound = np.maximum(min_delta_diff, min_delta_S) upper_bound = np.minimum(max_delta_diff, max_delta_S) if lower_bound > upper_bound: raise RuntimeError(f"No feasible solutions: lower bound {lower_bound} > upper_bound {upper_bound} in coord descent") coord_loss = lambda delta: -loss_S(delta) - loss_diff(-1*delta) delta = sp.optimize.fminbound( coord_loss, lower_bound, upper_bound ) delta = np.maximum(np.minimum(upper_bound, delta), lower_bound) # 2. Update QR decomp for 2 Sigma - S U = np.array([basis[i], basis[j]]).T V = np.array([basis[j], basis[i]]).T Q, R = sp.linalg.qr_update( Q=Q, R=R, u=-1*delta*U, v=V ) # 3. Update QR decomp for S U = np.array([ei, ej]).T V = np.array([ej, ei]).T Sblock_QR[g_id] = sp.linalg.qr_update( Q=Q_S, R=R_S, u=delta*U, v=V ) # 4. Update S S[i, j] += delta S[j, i] += delta # For diagonal elements, rank-1 update else: cn_diff, cd_diff = _solve_cn_cd(Q, R, j) # 2. Compute coefficients cn and cd for S cn_S, cd_S = _solve_cn_cd( Q=Q_S, R=R_S, j=j_within ) # 2. Construct/solve quadratic equation delta = _solve_mvr_quadratic_group( cn_diff=cn_diff, cd_diff=cd_diff, cn_S=cn_S, cd_S=cd_S, ) # 3. Update QR decmp for 2Sigma - S Q, R = sp.linalg.qr_update( Q=Q, R=R, u=basis[i], v=-1*delta*basis[j], ) # 4. Update QR decomp for Sblock Sblock_QR[g_id] = sp.linalg.qr_update( Q=Q_S, R=R_S, u=ei, v=delta*ej ) # 5. Set new value for S S[i, j] += delta # Check for convergence prev_loss = loss loss = mvr_loss(Sigma, acc_rate * S, smoothing=smoothing) if it != 0: decayed_improvement = decayed_improvement / 10 + 9 * (prev_loss - loss) / 10 if verbose: print( f"After iter {it} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}" ) if decayed_improvement < converge_tol and decayed_improvement > 0: if verbose: print(f"Converged after iteration {it} with loss={loss}") break return S
[docs]def solve_mvr( Sigma, groups=None, tol=1e-5, verbose=False, num_iter=50, smoothing=0, rej_rate=0, converge_tol=1e-3, choldate_warning=True, ): """ Computes S-matrix used to generate minimum variance-based reconstructability knockoffs using coordinate descent. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X groups : np.ndarray For group knockoffs, a p-length array of integers from 1 to num_groups such that ``groups[j] == i`` indicates that variable `j` is a member of group `i`. Defaults to ``None`` (regular knockoffs). tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. smoothing : float Add ``smoothing`` to all eigenvalues of the feature-knockoff precision matrix before inverting to avoid numerical instability. Defaults to 0. converge_tol : float A parameter specifying the criteria for convergence. choldate_warning : bool If True and groups is None, will warn the user if choldate is not installed. Defaults to True. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Process groups p = Sigma.shape[0] if groups is None: groups = np.arange(1, p+1) groups = utilities.preprocess_groups(groups) # Specialized implementations for ungrouped if np.all(groups == np.arange(1, p+1)): S = _solve_mvr_ungrouped( Sigma=Sigma, verbose=verbose, num_iter=num_iter, smoothing=smoothing, rej_rate=rej_rate, converge_tol=converge_tol, choldate_warning=choldate_warning ) else: S = _solve_mvr_grouped( Sigma=Sigma, groups=groups, tol=tol, verbose=verbose, num_iter=num_iter, converge_tol=converge_tol, smoothing=smoothing, rej_rate=rej_rate, ) # Ensure validity of solution S = utilities.shift_until_PSD(S, tol=tol) S, _ = utilities.scale_until_PSD(Sigma, S, tol=tol, num_iter=10) return S
### Maxent/MMI
[docs]def maxent_loss(Sigma, S, smoothing=0): """ Computes the log determinant of the feature-knockoff precision matrix, which is proportional to the negative entropy of [X, tilde{X}]. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X S : np.ndarray ``(p, p)``-shaped S-matrix used to generate knockoffs smoothing : float Add ``smoothing`` to all eigenvalues of the feature-knockoff precision matrix before taking the log determinant to avoid numerical instability. Defaults to 0. Returns ------- loss : float The maxent loss for Sigma and S. This is infinite if S is not feasible. """ p = Sigma.shape[0] eigs_S = np.diag(S) eigs_diff = np.linalg.eigh(2 * Sigma - S)[0] if np.min(eigs_S) < 0 or np.min(eigs_diff) < 0: return np.inf det_invG = np.log(1/(eigs_diff + smoothing)).sum() det_invG = det_invG + np.log(1/(eigs_S + smoothing)).sum() return det_invG
[docs]def mmi_loss(*args, **kwargs): """ Computes the log determinant of the feature-knockoff precision matrix, which is proportional mutual information between X and knockoffs. This is identical to ``maxent_loss`` and exists only for backwards compatability. """ return maxent_loss(*args, **kwargs)
[docs]def solve_maxent_factored( D, U, tol=1e-5, verbose=False, num_iter=50, converge_tol=1e-4, ): """ Computes S-matrix used to generate maximum entropy knockoffs using coordinate descent assuming that the covariance matrix follows a factor model. This means Sigma = D + UU^T for a p x p diagonal matrix D and a p x k matrix U. Parameters ---------- D : np.ndarray ``p``-shaped array of diagonal elements. U : np.ndarray ``(p, k)``-shaped matrix. Usually k << p. tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. converge_tol : float A parameter specifying the criteria for convergence. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ return _solve_maxent_sdp_factored( D=D, U=U, solve_sdp=False, tol=tol, verbose=verbose, num_iter=num_iter, converge_tol=converge_tol )
def _solve_maxent_sdp_factored( D, U, solve_sdp, tol=1e-5, verbose=False, num_iter=50, converge_tol=1e-4, mu=0.9, lambd=0.5, ): """ Internal function for maxent/sdp s-matrix using factor approximation. Parameters ---------- D : np.ndarray ``p``-shaped array of diagonal elements. U : np.ndarray ``(p, k)``-shaped matrix. Usually k << p. solve_sdp : bool If True, solve for SDP knockoffs. Else, solve for maximum entropy knockoffs. tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. converge_tol : float A parameter specifying the criteria for convergence. mu : float Barrier decay parameter lambd : float Initial barrier parameter Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Initial constants p = D.shape[0] k = U.shape[1] inds = np.arange(p) loss = np.inf # TODO: this is Omega(p^3). # Maybe use https://www.sciencedirect.com/science/article/pii/S2215016118300062 Sigma = np.diag(D) + np.dot(U, U.T) diag_Sigma = np.diag(Sigma) mineig = utilities.calc_mineig(Sigma) if mineig < 0: raise ValueError("D + UU^T is not PSD") # Initialize values time0 = time.time() decayed_improvement = 1 if solve_sdp: Sdiag = np.zeros(p) + 0.01 * mineig else: Sdiag = np.zeros(p) + mineig lambd = min(2*mineig, lambd) # These are all k x k matrices Q, R = sp.linalg.qr(np.eye(k) + 2*np.dot(U.T / (2*D - Sdiag), U)) for i in range(num_iter): np.random.shuffle(inds) for j in inds: # Qprime, Rprime are rank-one updates of Q, R # to help solve for the optimal Sj Qprime, Rprime = sp.linalg.qr_update( Q=Q, R=R, u=U[j].T/(2*D[j] - Sdiag[j]), v=-2*U[j].T, overwrite_qruv=False ) # Calculate update for S---for notation, see # https://arxiv.org/pdf/2006.08790.pdf MjUjT = np.dot(np.dot(Qprime, Rprime) - np.eye(k), U[j].T)/2 x = sp.linalg.solve_triangular( a=Rprime, b=np.dot(Qprime.T, MjUjT), lower=False ) sub_term = 4*np.dot(U[j], MjUjT) - 8*np.dot(MjUjT, x) if solve_sdp: Sjstar = max(min(1, 2*diag_Sigma[j] - sub_term - lambd), 0) else: Sjstar = (2*diag_Sigma[j] - sub_term)/2 # Rank one update to QR decomp delta = Sdiag[j] - Sjstar muj = U[j].T / (2*D[j] - Sdiag[j]) denom = 1 + delta/(2*D[j] - Sdiag[j]) c = delta/denom Q, R = sp.linalg.qr_update( Q=Q, R=R, u=-2*c*muj, v=muj, ) # Update S Sdiag[j] = Sjstar if solve_sdp: lambd = mu * lambd return np.diag(Sdiag)
[docs]def solve_maxent( Sigma, tol=1e-5, verbose=False, num_iter=50, converge_tol=1e-4, choldate_warning=True, ): """ Computes S-matrix used to generate maximum entropy knockoffs using coordinate descent. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. converge_tol : float A parameter specifying the criteria for convergence. choldate_warning : bool If True, will warn the user if choldate is not installed. Defaults to True Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ return _solve_maxent_sdp_cd( Sigma=Sigma, tol=tol, verbose=verbose, num_iter=num_iter, converge_tol=converge_tol, choldate_warning=choldate_warning, solve_sdp=False, )
def _solve_maxent_sdp_cd( Sigma, solve_sdp, tol=1e-5, verbose=False, num_iter=50, converge_tol=1e-4, choldate_warning=True, mu=0.9, lambd=0.5, ): """ This function is internally used to compute the S-matrices used to generate maximum entropy and SDP knockoffs. Users should not call this function---they should call ``solve_maxent`` or ``solve_sdp`` directly. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X tol : float Minimum permissible eigenvalue of 2Sigma - S and S. verbose : bool If True, prints updates during optimization. num_iter : int The number of coordinate descent iterations. Defaults to 50. converge_tol : float A parameter specifying the criteria for convergence. choldate_warning : bool If True, will warn the user if choldate is not installed. Defaults to True solve_sdp : bool If True, will solve SDP. Otherwise, will solve maxent formulation. lambd : float Initial barrier constant mu : float Barrier decay constant Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Warning if choldate not available if not CHOLDATE_AVAILABLE and choldate_warning: warnings.warn(constants.CHOLDATE_WARNING) # Initial constants time0 = time.time() V = Sigma # Shorthand prevents lines from spilling over p = V.shape[0] inds = np.arange(p) loss = np.inf # Initialize values decayed_improvement = 1 mineig = utilities.calc_mineig(V) if solve_sdp: S = 0.01 * mineig * np.eye(p) else: S = mineig * np.eye(p) L = np.linalg.cholesky(2 * V - S) lambd = min(2*mineig, lambd) # Loss function if solve_sdp: loss_fn = lambda V, S: S.shape[0] - np.diag(S).sum() else: loss_fn = maxent_loss for i in range(num_iter): np.random.shuffle(inds) for j in inds: diff = 2 * V - S # Solve cholesky equation tildey = 2 * V[j].copy() tildey[j] = 0 x = sp.linalg.solve_triangular(a=L, b=tildey, lower=True) # Use cholesky eq to get new update zeta = diff[j, j] x22 = np.power(x, 2).sum() qinvterm = zeta * x22 / (zeta + x22) # Inverse of Qj using SWM formula if solve_sdp: sjstar = max(min(1, 2 * V[j, j] - qinvterm - lambd), 0) else: sjstar = (2 * V[j, j] - qinvterm) / 2 # Rank one update for cholesky delta = S[j, j] - sjstar x = np.zeros(p) x[j] = np.sqrt(np.abs(delta)) if delta > 0: if CHOLDATE_AVAILABLE: choldate.cholupdate(L.T, x) else: cholupdate(L.T, x, add=False) else: if CHOLDATE_AVAILABLE: choldate.choldowndate(L.T, x) else: cholupdate(L.T, x, add=True) # Set new value for S S[j, j] = sjstar # Check for convergence prev_loss = loss loss = loss_fn(V, S) if i != 0: loss_diff = prev_loss - loss if solve_sdp: loss_diff = max(loss_diff, lambd) decayed_improvement = decayed_improvement / 10 + 9 * (loss_diff) / 10 if verbose: print( f"After iter {i} at time {np.around(time.time() - time0,3)}, loss={loss}, decayed_improvement={decayed_improvement}" ) if decayed_improvement < converge_tol: if verbose: print(f"Converged after iteration {i} with loss={loss}") break # Update barrier parameter if solving SDP if solve_sdp: lambd = mu * lambd # Ensure validity of solution S = utilities.shift_until_PSD(S, tol=tol) S, _ = utilities.scale_until_PSD(V, S, tol=tol, num_iter=10) return S
[docs]def solve_mmi(*args, **kwargs): """ Computes S-matrix used to generate minimum mutual information knockoffs. This is identical to ``solve_maxent`` and exists only for backwards compatability. """ return solve_maxent(*args, **kwargs)
[docs]def solve_ciknock( Sigma, tol=1e-5, num_iter=10, ): """ Computes S-matrix used to generate conditional independence knockoffs. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X tol : float Minimum permissible eigenvalue of 2Sigma - S and S. num_iter : int The number of iterations in the binary search to ensure S is feasible. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs Notes ----- When the S-matrix corresponding to conditional independence knockoffs is not feasible, this computes that S matrix and then does a binary search to find the maximum gamma such that gamma * S is feasible. """ # Compute vanilla S_CI S = 1 / (np.diag(np.linalg.inv(Sigma))) S = np.diag(S) # Ensure validity of solution S = utilities.shift_until_PSD(S, tol=tol) S, _ = utilities.scale_until_PSD(Sigma, S, tol=tol, num_iter=num_iter) return S