Source code for hyperlearn.big_data.truncated


from scipy.sparse import linalg as sparse
from numpy import finfo
from ..utils import *
from ..random import uniform

__all__ = ['truncatedEigh', 'truncatedSVD', 'truncatedEig']


[docs]def truncatedEigh(XTX, n_components = 2, tol = None, svd = False, which = 'largest'): """ [Edited 6/11/2018 Added smallest / largest command] Computes the Truncated Eigendecomposition of a Hermitian Matrix (positive definite). K = 2 for default. Return format is LARGEST eigenvalue first. If SVD is True, then outputs S2**0.5 and sets negative S2 to 0 and outputs VT and not V. Speed -------------- Uses ARPACK from Scipy to compute the truncated decomp. Note that to make it slightly more stable and faster, follows Sklearn's random intialization from -1 -> 1. Also note tolerance is resolution(X), and NOT eps(X) Might switch to SPECTRA in the future. Stability -------------- EIGH FLIP is called to flip the eigenvector signs for deterministic output. """ n_components = int(n_components) n,p = XTX.shape dtype = XTX.dtype assert n == p if tol is None: tol = finfo(dtype).resolution size = n if p >= n else p # min(n,p) v = uniform(-1, 1, size, dtype = dtype) if which == 'largest': S2, V = sparse.eigsh(XTX, k = n_components, tol = tol, v0 = v) else: # Uses shift invert mode to get smallest S2, V = sparse.eigsh(XTX, k = n_components, tol = tol, v0 = v, sigma = 0) V = eig_flip(V) # Note ARPACK provides SMALLEST to LARGEST S2. Hence, reverse. S2, V = S2[::-1], V[:,::-1] if svd: S2[S2 < 0] = 0.0 S2 **= 0.5 return S2, V.T return S2, V
[docs]def truncatedEig(X, n_components = 2, tol = None, svd = False, which = 'largest'): """ [Added 6/11/2018] Computes truncated eigendecomposition given any matrix X. Directly uses TruncatedSVD if memory is not enough, and returns eigen vectors/values. Also argument for smallest eigen components are provided. """ if memoryXTX(X): covariance = _XTX(X.T) S, VT = truncatedEigh(covariance, n_components, tol, which = which, svd = svd) else: __, S, VT = truncatedSVD(X, n_components, tol, which = which) if svd: return S, VT S **= 2 VT = VT.T return S, VT
[docs]def truncatedSVD(X, n_components = 2, tol = None, transpose = True, U_decision = False, which = 'largest'): """ [Edited 6/11/2018 Added which command - can get largest or smallest eigen components] Computes the Truncated SVD of any matrix. K = 2 for default. Return format is LARGEST singular first first. Speed -------------- Uses ARPACK from Scipy to compute the truncated decomp. Note that to make it slightly more stable and faster, follows Sklearn's random intialization from -1 -> 1. Also note tolerance is resolution(X), and NOT eps(X). Also note TRANSPOSE is True. This means instead of computing svd(X) if p > n, then computing svd(X.T) is faster, but you must output VT.T, S, U.T Might switch to SPECTRA in the future. Stability -------------- SVD FLIP is called to flip the VT signs for deterministic output. Note uses VT based decision and not U based decision. U_decision can be changed to TRUE for Sklearn convention """ n_components = int(n_components) dtype = X.dtype n, p = X.shape transpose = True if (transpose and p > n) else False if transpose: X, U_decision = X.T, not U_decision if tol is None: tol = finfo(dtype).resolution size = n if p >= n else p # min(n,p) v = uniform(-1, 1, size, dtype = dtype) which = 'LM' if which == 'largest' else 'SM' U, S, VT = sparse.svds(X, k = n_components, tol = tol, v0 = v, which = which) # Note ARPACK provides SMALLEST to LARGEST S. Hence, reverse. U, S, VT = U[:, ::-1], S[::-1], VT[::-1] U, VT = svd_flip(U, VT, U_decision = U_decision) if transpose: return VT.T, S, U.T return U, S, VT
def truncatedPinv(X, n_components = None, alpha = None): """ [Added 6/11/2018] Implements fast truncated pseudoinverse with regularization. Can be used as an approximation to the matrix inverse. """ if alpha != None: assert alpha >= 0 alpha = 0 if alpha is None else alpha if n_components == None: # will provide approx sqrt(p) - 1 components. # A heuristic, so not guaranteed to work. k = int(sqrt(X.shape[1]))-1 if k <= 0: k = 1 else: k = int(n_components) if n_components > 0 else 1 X = _float(X) U, S, VT = truncatedSVD(X, n_components) U, S, VT = _svdCond(U, S, VT, alpha) return VT.T * S @ U.T