Source code for knockpy.smatrix

import numpy as np
import scipy.cluster.hierarchy as hierarchy
from . import mrc
from . import mac
from . import dgp
from . import utilities
from . import constants

# These methods don't require approximation
NON_APPROX_METHODS = ["equicorrelated", "eq", "ci", "ciknock"]

[docs]def parse_method(method, groups, p=None): """ Decides which method to use to create the knockoff S matrix """ if method is not None: return method if groups is None: return "mvr" if p is None: p = groups.shape[0] if np.all(groups == np.arange(1, p + 1, 1)): return "mvr" else: return "sdp" return method
def divide_computation(Sigma, max_block): """ Approximates Sigma as a block-diagonal matrix. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X max_size : int Maximum size of a block in the block-diagonal approximation. Returns ------- blocks : np.ndarray ``(p, )``-shaped numpy array where ``blocks[i] == j`` indicates that variable ``i`` belongs to block ``j``. """
[docs]def divide_computation(Sigma, max_block): """ Approximates a correlation matrix Sigma as a block-diagonal matrix using hierarchical clustering. Roughly follows the R knockoff package. """ # Correlation tree. We add noise to deal with highly structured Sigma. p = Sigma.shape[0] noise = np.random.randn(p, p) * 1e-6 noise += noise.T Sigma = Sigma + noise link = dgp.create_correlation_tree(Sigma) # Set up binary search max_clusters = p min_clusters = 1 prev_max_clusters = p prev_min_clusters = 1 # Binary search to create clusters for j in range(100): # Create new groups and check maximum size n_clusters = int((max_clusters + min_clusters) / 2) groups = hierarchy.cut_tree(link, n_clusters).reshape(-1) + 1 current_max_block = utilities.calc_group_sizes(groups).max() # Cache search info and check maximum size prev_max_clusters = max_clusters prev_min_clusters = min_clusters if current_max_block > max_block: min_clusters = n_clusters else: max_clusters = n_clusters # Break if nothing has changed between iterations if min_clusters == prev_min_clusters and max_clusters == prev_max_clusters: if current_max_block > max_block: groups = hierarchy.cut_tree(link, n_clusters + 1).reshape(-1) + 1 break return merge_groups(groups, max_block)
[docs]def merge_groups(groups, max_block): """ Merges groups of variables together while ensuring all new groups have size less than max_block. """ p = groups.shape[0] new_groups = np.zeros(p) # Loop through groups and concatenate them together # until we exceed max size current_size = 0 new_group_id = 1 old_group_ids = np.unique(groups) np.random.shuffle(old_group_ids) # Iterate through old groups for old_group_id in old_group_ids: flag = groups == old_group_id old_group_size = flag.sum() # Either count old group as part of new group if current_size + old_group_size <= max_block: current_size += old_group_size # Or add a new group elif old_group_size <= max_block: current_size = old_group_size new_group_id += 1 else: raise ValueError( f"Group {old_group_id} has size {old_group_size}, exceeding max_block {max_block}" ) new_groups[flag] = new_group_id return new_groups
[docs]def compute_smatrix_factored( Sigma, D=None, U=None, method='mvr', num_factors=20, **kwargs ): """ Wraps S-matrix generation functions which approximate ``Sigma = np.diag(D) + np.dot(U, U.T)``. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X D : np.ndarray ``p``-shaped array of diagonal elements for factor model. Only used if how_approx='factor'. This is optional if Sigma is not None. U : np.ndarray ``(p, k)``-shaped matrix for factor model. Usually k << p. Only used if how_approx='factor'. This is optional if Sigma is not None. method : str Method for constructing S-matrix. One of mvr, maxent, mmi. num_factors : int The number of factors if how_approx='factor'. Defaults to 20. kwargs : dict kwargs to pass to one of the wrapped S-matrix solvers. Notes ----- mmi stands for minimum mutual information, which is the same as maximizing entropy. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # Compute factor model if not inputted if D is None or U is None: raise NotImplementedError("TODO") p = D.shape[0] k = U.shape[1] # Solve using specialized methods if method == 'mvr': S = mrc.solve_mvr_factored(D=D, U=U, **kwargs) elif method == 'maxent' or method == 'mmi': S = mrc.solve_maxent_factored(D=D, U=U, **kwargs) else: raise NotImplementedError(f"Factor model solver is not implemented for method={method}") # If Sigma is provided, ensure exact validity tol = kwargs.get('tol', constants.DEFAULT_TOL) S = utilities.shift_until_PSD(S, tol=tol) if Sigma is not None: S, _ = utilities.scale_until_PSD(Sigma, S, tol=tol, num_iter=10) return S
[docs]def compute_smatrix( Sigma, groups=None, method=None, solver="cd", how_approx='blockdiag', max_block=1000, num_factors=20, num_processes=1, D=None, U=None, **kwargs, ): """ Wraps a variety of S-matrix generation functions. For mvr, maxent, mmi, and sdp methods, this can use a block-diagonal approximation of Sigma if the dimension of Sigma exceeds max_block or a factor approximation. Parameters ---------- Sigma : np.ndarray ``(p, p)``-shaped covariance matrix of X groups : np.ndarray For group knockoffs, a p-length array of integers from 1 to num_groups such that ``groups[j] == i`` indicates that variable `j` is a member of group `i`. Defaults to ``None`` (regular knockoffs). method : str Method for constructing S-matrix. One of mvr, maxent, mmi, sdp, equicorrelated, ci. solver : str Method for solving mrc knockoffs. One of 'cd' (coordinate descent) or 'psgd' (projected gradient descent). Coordinate descent is highly recommended. how_approx : str How to approximate the covariance matrix to speed up computation. - If 'blockdiag', approximates Sigma as a block-diagonal matrix. - If 'factor', approximates ``Sigma = np.diag(D) + np.dot(U, U.T)``, a factor model. max_block : int The maximum size of a block if how_approx='blockdiag'. Defaults to 1000. num_factors : int The number of factors if how_approx='factor'. Defaults to 20. num_processes : int Number of parallel process to use if Sigma is approximated as a block-diagonal matrix. D : np.ndarray ``p``-shaped array of diagonal elements for factor model. Only used if how_approx='factor'. This is optional if Sigma is not None. U : np.ndarray ``(p, k)``-shaped matrix for factor model. Usually k << p. Only used if how_approx='factor'. This is optional if Sigma is not None. kwargs : dict kwargs to pass to one of the wrapped S-matrix solvers. Notes ----- mmi stands for minimum mutual information, which is the same as maximizing entropy. Returns ------- S : np.ndarray ``(p, p)``-shaped (block) diagonal matrix used to generate knockoffs """ # If S in kwargs, just return S (important # for chaining methods in metro sampling) kwargs = kwargs.copy() if "S" in kwargs: if kwargs["S"] is not None: return kwargs["S"] else: kwargs.pop("S") # Parse method if method is not None: method = str(method).lower() method = parse_method(method, groups, None) # These two are the same if method == 'mmi': method = 'maxent' # Factor-models go to a specialized helper if how_approx == 'factor': if method in NON_APPROX_METHODS: pass else: return compute_smatrix_factored( Sigma=Sigma, D=D, U=U, method=method, num_factors=num_factors, **kwargs ) # Initial params p = Sigma.shape[0] if groups is None: groups = np.arange(1, p + 1, 1) # Scale to correlation matrix scale = np.sqrt(np.diag(Sigma)) scale_matrix = np.outer(scale, scale) Sigma = Sigma / scale_matrix # Possibly use block-diagonal approximation, either using # hierarchical clustering for non-grouped knockoffs or # randomly merging groups for group knockoffs. if p > max_block and method not in NON_APPROX_METHODS: if np.all(groups == np.arange(1, p + 1, 1)): blocks = divide_computation(Sigma, max_block) else: blocks = merge_groups(groups, max_block) block_sizes = utilities.calc_group_sizes(blocks) nblocks = block_sizes.shape[0] print( f"Using blockdiag approx. with nblocks={nblocks} and max_size={block_sizes.max()}..." ) Sigma_blocks = utilities.blockdiag_to_blocks(Sigma, blocks) group_blocks = [] for j in range(int(blocks.min()), int(blocks.max()) + 1): group_blocks.append(utilities.preprocess_groups(groups[blocks == j])) # Recursive subcall for each block. Possibly use multiprocessing. constant_inputs = { "method": method, "solver": solver, "max_block": p, } for key in kwargs: constant_inputs[key] = kwargs[key] S_blocks = utilities.apply_pool( func=compute_smatrix, constant_inputs=constant_inputs, Sigma=Sigma_blocks, groups=group_blocks, num_processes=num_processes, ) print("Finished comp of blocks, putting together") # Put blocks together S = np.zeros((p, p)) block_id = 1 for Sigma_block, S_block in zip(Sigma_blocks, S_blocks): block_inds = np.where(blocks == block_id)[0] block_inds = np.ix_(block_inds, block_inds) S[block_inds] = S_block block_id += 1 # Make S feasible S, _ = utilities.scale_until_PSD( Sigma=Sigma, S=S, tol=kwargs.get("tol", constants.DEFAULT_TOL), num_iter=kwargs.get("num_iter", 10), ) # Line search for MRC methods smoothing = kwargs.get("smoothing", 0) if method == "mvr": loss_fn = mrc.mvr_loss elif method == "maxent": loss_fn = mrc.maxent_loss best_gamma = 1 if method in ["mvr", "maxent"]: best_loss = loss_fn(Sigma=Sigma, S=S, smoothing=smoothing) for gamma in np.arange(20)/10: loss = loss_fn(Sigma=Sigma, S=gamma * S, smoothing=smoothing) if loss < best_loss: best_gamma = gamma best_loss = loss return S * best_gamma * scale_matrix # Currently cd solvers cannot handle group knockoffs # (this is todo) if not np.all(groups == np.arange(1, p + 1, 1)): solver = "psgd" if (method == "mvr" or method == "maxent") and solver == "psgd": # Check for imports utilities.check_kpytorch_available(purpose="group MRC knockoffs OR PSGD solver") from .kpytorch import mrcgrad S = mrcgrad.solve_mrc_psgd(Sigma=Sigma, groups=groups, method=method, **kwargs) elif method == "mvr": S = mrc.solve_mvr(Sigma=Sigma, **kwargs) elif method == "maxent": S = mrc.solve_maxent(Sigma=Sigma, **kwargs) elif method == "sdp" or method == "asdp": S = mac.solve_group_SDP(Sigma, groups, **kwargs,) elif method == "ciknock" or method == "ci": S = mrc.solve_ciknock(Sigma, **kwargs) elif method == "equicorrelated" or method == "eq": S = mac.solve_equicorrelated(Sigma, groups, **kwargs) else: raise ValueError(f"Unrecognized method {method}") # Rescale and return return S * scale_matrix