Source code for hyperlearn.sparse.base



from numpy import uint8, uint16, uint32, uint64, float32, float64
from numpy import zeros, int8, int16, int32, int64, ndim
from warnings import warn as Warn
from numba import njit, prange


[docs]def getDtype(p, size, uint = True): """ Computes the exact best possible data type for CSR Matrix creation. """ p = int(1.25*p) # Just in case if uint: dtype = uint64 if uint8(p) == p: dtype = uint8 elif uint16(p) == p: dtype = uint16 elif uint32(p) == p: dtype = uint32 return zeros(size, dtype = dtype) else: dtype = int64 if int8(p) == p: dtype = int8 elif int16(p) == p: dtype = int16 elif int32(p) == p: dtype = int32 return zeros(size, dtype = dtype)
[docs]@njit(fastmath = True, nogil = True, cache = True) def determine_nnz(X, rowCount): """ Uses close to no memory at all when computing how many non zeros are in the matrix. Notice the difference with Scipy is HyperLearn does NOT use nonzero(). This reduces memory usage dramatically. """ nnz = 0 n,p = X.shape for i in range(n): currNNZ = 0 Xi = X[i] for j in range(p): if Xi[j] != 0: currNNZ += 1 nnz += currNNZ rowCount[i] = currNNZ return rowCount, nnz
[docs]def create_csr(X, rowCount, nnz, temp): """ [Added 10/10/2018] [Edited 13/10/2018] Before used extra memory keeping a Boolean Matrix (np bytes) and a ColIndex pointer which used p memory. Now, removed (np + p) memory usage, meaning larger matrices can be handled. Algorithm is 3 fold: 1. Create RowIndices 2. For every row in data: 3. Store until a non 0 is seen. Algorithm takes approx O(n + np) time, which is similar to Scipy's. The only difference is now, parallelisation is possible, which can cut the time to approx O(n + np/c) where c = no of threads """ n = X.shape[0] val = zeros(nnz, dtype = X.dtype) rowIndices = zeros(n+1, dtype = temp.dtype) colPointer = zeros(nnz, dtype = rowCount.dtype) p = X.shape[1] k = 0 for i in range(n): a = rowCount[i] rowIndices[i] += k k += a rowIndices[n] = nnz for i in prange(n): Xi = X[i] left = rowIndices[i] right = rowIndices[i+1] k = 0 for j in range(left, right): while Xi[k] == 0: k += 1 val[j] = Xi[k] colPointer[j] = k k += 1 return val, colPointer, rowIndices
create_csr_cache = njit(create_csr, fastmath = True, nogil = True, cache = True) create_csr_parallel = njit(create_csr, fastmath = True, nogil = True, parallel = True)
[docs]def CreateCSR(X, n_jobs = 1): """ [Added 10/10/2018] [Edited 13/10/2018] Much much faster than Scipy. In fact, HyperLearn uses less memory, by noticing indices >= 0, hence unsigned ints are used. Likewise, parallelisation is seen possible with Numba with n_jobs. Notice, an error message will be provided if 20% of the data is only zeros. It needs to be more than 20% zeros for CSR Matrix to shine. """ n,p = X.shape rowCount = getDtype(p, n) rowCount, nnz = determine_nnz(X, rowCount) if nnz/(n*p) > 0.8: Warn("Created sparse matrix has just under 20% zeros. Not a good idea to sparsify the matrix.") temp = getDtype(nnz, 1) f = create_csr_cache if n_jobs == 1 else create_csr_parallel return f(X, rowCount, nnz, temp)