Source code for hyperlearn.big_data.randomized


from ..linalg import lu, svd, qr, eig
from numpy import random as _random, sqrt
from numpy.linalg import norm
from ..utils import _float, _svdCond, traceXTX, eig_flip, svd_flip
from ..random import uniform

# __all__ = ['randomizedSVD', 'randomizedEig']


[docs]def randomized_projection(X, k, solver = 'lu', max_iter = 4): """ [Edited 8/11/2018 Added QR Q_only parameter] Projects X onto some random eigenvectors, then using a special variant of Orthogonal Iteration, finds the closest orthogonal representation for X. Solver can be QR or LU or None. """ n, p = X.shape if max_iter == 'auto': # From Modern Big Data Algorithms --> seems like <= 4 is enough. _min = n if n <= p else p max_iter = 5 if k < 0.1 * _min else 4 Q = uniform(-5, 5, p, int(k), X.dtype) XT = X.T _solver = lambda x: lu(x, L_only = True) if solver == 'qr': _solver = lambda x: qr(x, Q_only = True) elif solver == None: _solver = lambda x: x for __ in range(max_iter): Q = _solver(XT @ _solver(X @ Q)) return qr(X @ Q, Q_only = True)
[docs]def randomizedSVD(X, n_components = 2, max_iter = 'auto', solver = 'lu', n_oversamples = 10): """ [Edited 9/11/2018 Fixed SVD_flip] HyperLearn's Fast Randomized SVD is approx 10 - 30 % faster than Sklearn's implementation depending on n_components and max_iter. Uses NUMBA Jit accelerated functions when available, and tries to reduce memory overhead by chaining operations. Uses QR, LU or no solver to find the best SVD decomp. QR is most stable, but can be 2x slower than LU. ****n_oversamples = 10. This follows Sklearn convention to increase the chance of more accurate SVD. References -------------- * Sklearn's RandomizedSVD * Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061 * A randomized algorithm for the decomposition of matrices Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert * An implementation of a randomized algorithm for principal component analysis A. Szlam et al. 2014 """ n,p = X.shape transpose = (n < p) X = X if not transpose else X.T X = _float(X) Q = randomized_projection(X, n_components + n_oversamples, solver, max_iter) U, S, VT = svd(Q.T @ X, U_decision = transpose, transpose = True) U = Q @ U if transpose: U, VT = VT.T, U.T return U[:, :n_components], S[:n_components], VT[:n_components, :]
[docs]def randomizedEig(X, n_components = 2, max_iter = 'auto', solver = 'lu', n_oversamples = 10): """ [Edited 9/11/2018 Fixed Eig_Flip] HyperLearn's Randomized Eigendecomposition is an extension of Sklearn's randomized SVD. HyperLearn notices that the computation of U is not necessary, hence will use QR followed by SVD or just SVD depending on the situation. Likewise, solver = LU is default, and follows randomizedSVD References -------------- * Sklearn's RandomizedSVD * Finding structure with randomness: Stochastic algorithms for constructing approximate matrix decompositions Halko, et al., 2009 http://arxiv.org/abs/arXiv:0909.4061 * A randomized algorithm for the decomposition of matrices Per-Gunnar Martinsson, Vladimir Rokhlin and Mark Tygert * An implementation of a randomized algorithm for principal component analysis A. Szlam et al. 2014 """ n,p = X.shape transpose = (n < p) X = X if not transpose else X.T X = _float(X) Q = randomized_projection(X, n_components + n_oversamples, solver, max_iter) if transpose: V, S2, __ = svd(Q.T @ X, U_decision = transpose, transpose = True) V = Q @ V S2 **= 2 else: S2, V = eig(Q.T @ X, U_decision = transpose) return S2[:n_components], V[:, :n_components]
[docs]def randomizedPinv(X, n_components = None, alpha = None): """ [Added 6/11/2018] Implements fast randomized pseudoinverse with regularization. Can be used as an approximation to the matrix inverse. """ if alpha != None: assert alpha >= 0 alpha = 0 if alpha is None else alpha if n_components == None: # will provide approx sqrt(p) - 1 components. # A heuristic, so not guaranteed to work. k = int(sqrt(X.shape[1]))-1 if k <= 0: k = 1 else: k = int(n_components) if n_components > 0 else 1 X = _float(X) U, S, VT = randomizedSVD(X, n_components) U, S, VT = _svdCond(U, S, VT, alpha) return VT.T * S @ U.T