from scipy.sparse import linalg as sparse
from numpy import finfo
from ..utils import *
from ..random import uniform
__all__ = ['truncatedEigh', 'truncatedSVD', 'truncatedEig']
[docs]def truncatedEigh(XTX, n_components = 2, tol = None, svd = False, which = 'largest'):
"""
[Edited 6/11/2018 Added smallest / largest command]
Computes the Truncated Eigendecomposition of a Hermitian Matrix
(positive definite). K = 2 for default.
Return format is LARGEST eigenvalue first.
If SVD is True, then outputs S2**0.5 and sets negative S2 to 0
and outputs VT and not V.
Speed
--------------
Uses ARPACK from Scipy to compute the truncated decomp. Note that
to make it slightly more stable and faster, follows Sklearn's
random intialization from -1 -> 1.
Also note tolerance is resolution(X), and NOT eps(X)
Might switch to SPECTRA in the future.
Stability
--------------
EIGH FLIP is called to flip the eigenvector signs for deterministic
output.
"""
n_components = int(n_components)
n,p = XTX.shape
dtype = XTX.dtype
assert n == p
if tol is None: tol = finfo(dtype).resolution
size = n if p >= n else p # min(n,p)
v = uniform(-1, 1, size, dtype = dtype)
if which == 'largest':
S2, V = sparse.eigsh(XTX, k = n_components, tol = tol, v0 = v)
else:
# Uses shift invert mode to get smallest
S2, V = sparse.eigsh(XTX, k = n_components, tol = tol, v0 = v, sigma = 0)
V = eig_flip(V)
# Note ARPACK provides SMALLEST to LARGEST S2. Hence, reverse.
S2, V = S2[::-1], V[:,::-1]
if svd:
S2[S2 < 0] = 0.0
S2 **= 0.5
return S2, V.T
return S2, V
[docs]def truncatedEig(X, n_components = 2, tol = None, svd = False, which = 'largest'):
"""
[Added 6/11/2018]
Computes truncated eigendecomposition given any matrix X. Directly
uses TruncatedSVD if memory is not enough, and returns eigen vectors/values.
Also argument for smallest eigen components are provided.
"""
if memoryXTX(X):
covariance = _XTX(X.T)
S, VT = truncatedEigh(covariance, n_components, tol, which = which, svd = svd)
else:
__, S, VT = truncatedSVD(X, n_components, tol, which = which)
if svd:
return S, VT
S **= 2
VT = VT.T
return S, VT
[docs]def truncatedSVD(X, n_components = 2, tol = None, transpose = True, U_decision = False, which = 'largest'):
"""
[Edited 6/11/2018 Added which command - can get largest or smallest eigen components]
Computes the Truncated SVD of any matrix. K = 2 for default.
Return format is LARGEST singular first first.
Speed
--------------
Uses ARPACK from Scipy to compute the truncated decomp. Note that
to make it slightly more stable and faster, follows Sklearn's
random intialization from -1 -> 1.
Also note tolerance is resolution(X), and NOT eps(X). Also note
TRANSPOSE is True. This means instead of computing svd(X) if p > n,
then computing svd(X.T) is faster, but you must output VT.T, S, U.T
Might switch to SPECTRA in the future.
Stability
--------------
SVD FLIP is called to flip the VT signs for deterministic
output. Note uses VT based decision and not U based decision.
U_decision can be changed to TRUE for Sklearn convention
"""
n_components = int(n_components)
dtype = X.dtype
n, p = X.shape
transpose = True if (transpose and p > n) else False
if transpose:
X, U_decision = X.T, not U_decision
if tol is None: tol = finfo(dtype).resolution
size = n if p >= n else p # min(n,p)
v = uniform(-1, 1, size, dtype = dtype)
which = 'LM' if which == 'largest' else 'SM'
U, S, VT = sparse.svds(X, k = n_components, tol = tol, v0 = v, which = which)
# Note ARPACK provides SMALLEST to LARGEST S. Hence, reverse.
U, S, VT = U[:, ::-1], S[::-1], VT[::-1]
U, VT = svd_flip(U, VT, U_decision = U_decision)
if transpose:
return VT.T, S, U.T
return U, S, VT
def truncatedPinv(X, n_components = None, alpha = None):
"""
[Added 6/11/2018]
Implements fast truncated pseudoinverse with regularization.
Can be used as an approximation to the matrix inverse.
"""
if alpha != None: assert alpha >= 0
alpha = 0 if alpha is None else alpha
if n_components == None:
# will provide approx sqrt(p) - 1 components.
# A heuristic, so not guaranteed to work.
k = int(sqrt(X.shape[1]))-1
if k <= 0: k = 1
else:
k = int(n_components) if n_components > 0 else 1
X = _float(X)
U, S, VT = truncatedSVD(X, n_components)
U, S, VT = _svdCond(U, S, VT, alpha)
return VT.T * S @ U.T