Source code for hyperlearn.sparse.csr


from numba import njit, prange
from numpy import zeros, sum as _sum, array, hstack, searchsorted, ndim
from .base import getDtype

"""
CSR Matrix functions.
	1. sum [sum_A, sum_0, sum_1]
	2. mean [mean_A, mean_0, mean_1]
	3. mult [mult_A, mult_0, mult_1]
	4. div [div_A, div_0, div_1]
	5. add [add_A, add_0, add_1]
	6. min [min_A, min_0, min_1]
	7. max [max_A, max_0, max_1]
	8. diagonal [diagonal, diagonal_add]
	9. element [get_element]
	10. matmul [mat_vec, matT_vec, mat_mat]
	11. _XXT
	12. rowSum
"""


[docs]@njit(fastmath = True, nogil = True, cache = True)
def sum_A(val, colPointer, rowIndices, n, p):
	S = 0
	for i in range(len(val)):
		S += val[i]
	return S


[docs]@njit(fastmath = True, nogil = True, cache = True)
def sum_0(val, colPointer, rowIndices, n, p):
	S = zeros(p, dtype = val.dtype)
	
	for i in range(len(val)):
		S[colPointer[i]] += val[i]
	return S


[docs]@njit(fastmath = True, nogil = True, cache = True)
def sum_1(val, colPointer, rowIndices, n, p):
	S = np.zeros(n, dtype = val.dtype)
	
	for i in range(n):
		left = rowIndices[i]
		right = rowIndices[i+1]
		S[i] += _sum(val[left:right])
	return S


[docs]@njit(fastmath = True, nogil = True, cache = True)
def mean_A(val, colPointer, rowIndices, n, p):
	S = sum_A(val, rowIndies)
	return S/len(val)


[docs]@njit(fastmath = True, nogil = True, cache = True)
def mean_0(val, colPointer, rowIndices, n, p):
	A = zeros(p, dtype = val.dtype)
	
	nnz = len(val)
	for i in range(nnz):
		j = colPointer[i]
		A[j] += val[i]
		
	for i in range(p):
		if A[i] > 0:
			A[i] /= n
	return A


[docs]@njit(fastmath = True, nogil = True, cache = True)
def mean_1(val, colPointer, rowIndices, n, p):
	A = zeros(n, dtype = val.dtype)
	
	for i in range(n):
		left = rowIndices[i]
		right = rowIndices[i+1]
		A[i] += _sum(val[left:right])
		if A[i] > 0:
			A[i] /= p
	return A


[docs]@njit(fastmath = True, nogil = True, cache = True)
def add_A(val, rowIndices, addon, n, p, copy = True):
	V = val.copy() if copy else val

	for i in range(len(val)):
		V[i] += addon
	return V, addon


[docs]@njit(fastmath = True, nogil = True, cache = True)
def add_0(val, colPointer, addon, n, p, copy = True):
	V = val.copy() if copy else val
	
	for i in range(len(val)):
		j = colPointer[i]
		V[i] += addon[j]
	return V, addon
	

[docs]@njit(fastmath = True, nogil = True, cache = True)
def add_1(val, rowIndices, addon, n, p, copy = True):
	V = val.copy() if copy else val

	for i in range(n):
		left = rowIndices[i]
		right = rowIndices[i+1]
		for j in range(left, right):
			V[j] += addon[i]
	return V, addon


[docs]@njit(fastmath = True, nogil = True, cache = True)
def div_A(val, colPointer, rowIndices, divisor, n, p, copy = True):
	V = val.copy() if copy else val

	for i in range(len(val)):
		V[i] /= divisor
	return V


[docs]@njit(fastmath = True, nogil = True, cache = True)
def div_0(val, colPointer, rowIndices, divisor, n, p, copy = True):
	V = val.copy() if copy else val
	
	for i in range(len(val)):
		j = colPointer[i]
		V[i] /= divisor[j]
	return V


[docs]@njit(fastmath = True, nogil = True, cache = True)
def div_1(val, colPointer, rowIndices, divisor, n, p, copy = True):
	V = val.copy() if copy else val

	for i in range(n):
		left = rowIndices[i]
		right = rowIndices[i+1]
		for j in range(left, right):
			V[j] /= divisor[i]
	return V


[docs]@njit(fastmath = True, nogil = True, cache = True)
def mult_A(val, colPointer, rowIndices, mult, n, p, copy = True):
	V = val.copy() if copy else val

	for i in range(len(val)):
		V[i] *= mult
	return V


[docs]@njit(fastmath = True, nogil = True, cache = True)
def mult_0(val, colPointer, rowIndices, mult, n, p, copy = True):
	V = val.copy() if copy else val
	
	for i in range(len(val)):
		j = colPointer[i]
		V[i] *= mult[j]
	return V


[docs]@njit(fastmath = True, nogil = True, cache = True)
def mult_1(val, colPointer, rowIndices, mult, n, p, copy = True):
	V = val.copy() if copy else val

	for i in range(n):
		left = rowIndices[i]
		right = rowIndices[i+1]
		for j in range(left, right):
			V[j] *= mult[i]
	return V


[docs]@njit(fastmath = True, nogil = True, cache = True)
def min_A(val, colPointer, rowIndices, n, p):
	M = 0
	for i in range(len(val)):
		v = V[i]
		if v < M:
			M = v
	return M


[docs]@njit(fastmath = True, nogil = True, cache = True)
def min_0(val, colPointer, rowIndices, n, p):
	M = zeros(p, dtype = val.dtype)
	
	for i in range(len(val)):
		v = V[i]
		coli = colPointer[i]
		if v < M[coli]:
			M[coli] = v
	return M


[docs]@njit(fastmath = True, nogil = True, cache = True)
def min_1(val, colPointer, rowIndices, n, p):
	M = zeros(n, dtype = val.dtype)
	
	for i in range(n):
		left = rowIndices[i]
		right = rowIndices[i+1]

		Mi = M[i]
		for j in val[left:right]:
			if j < Mi:
				Mi = j
		M[i] = Mi
	return M


[docs]@njit(fastmath = True, nogil = True, cache = True)
def max_A(val, colPointer, rowIndices, n, p):
	M = 0
	for i in range(len(val)):
		v = V[i]
		if v > M:
			M = v
	return M


[docs]@njit(fastmath = True, nogil = True, cache = True)
def max_0(val, colPointer, rowIndices, n, p):
	M = zeros(p, dtype = val.dtype)
	
	for i in range(len(val)):
		v = V[i]
		coli = colPointer[i]
		if v > M[coli]:
			M[coli] = v
	return M


[docs]@njit(fastmath = True, nogil = True, cache = True)
def max_1(val, colPointer, rowIndices, n, p):
	M = zeros(n, dtype = val.dtype)
	
	for i in range(n):
		left = rowIndices[i]
		right = rowIndices[i+1]

		Mi = M[i]
		for j in val[left:right]:
			if j > Mi:
				Mi = j
		M[i] = Mi
	return M


[docs]@njit(fastmath = True, nogil = True, cache = True)
def diagonal(val, colPointer, rowIndices, n, p):
	"""
	[Added 10/10/2018] [Edited 13/10/2018]
	Extracts the diagonal elements of a CSR Matrix. Note only gets
	square diagonal (not off-diagonal). HyperLearn's algorithm is faster
	than Scipy's, as it uses binary search, whilst Scipy uses Linear Search.
	
	d = min(n, p)
	HyperLearn = O(d log p)
	Scipy = O(dp)
	"""
	size = min(n,p)
	diag = zeros(size, dtype = val.dtype)
	for i in range(size):
		left = rowIndices[i]
		right = rowIndices[i+1]
		
		partial = colPointer[left:right]
		k = searchsorted(partial, i)
		
		# Get only diagonal elements else 0
		if partial[k] == i:
			diag[i] = val[left+k]
	return diag



@njit(fastmath = True, nogil = True, cache = True)
def _diagonal_add(val, colPointer, rowIndices, n, p, minimum, addon, copy = True):
	"""
	[Added 10/10/2018] [Edited 13/10/2018]
	HyperLearn's add diagonal to CSR Matrix is optimized and uses Binary Search.
	This algorithm is executed in 4 distinct steps:
	
	1. Intialise K vector (kinda like Dijkstra's algorithm)
		used to log where the diagonal element is.
	2. Use binary search to find the diagonal element.
		Scipy's is linear search, which is slower.
	3. Change Val, ColPointer vectors.
	4. Change RowIndices vector.
	
	Steps 1->4 also are mostly optimized in time complexity.
	
		d = min(n, p)
	1. O(d)
	2. O(d log p)
	3. O(2 dp)
	4. O(n)
	
	The magic is step 2, where Scipy is O(dp). Likewise, total complexity
	is O(d + n + dlogp + dp)
	"""
	size = min(n,p)
	A = addon.astype(val.dtype)
	
	V = val
	C = colPointer
	R = rowIndices.copy() if copy else rowIndices
	
	extend = 0
	
	K = zeros(size, dtype = minimum.dtype)
	for i in range(size):
		K[i] -= 1

	for i in range(size):
		left = R[i]
		right = R[i+1]
		
		partial = C[left:right]
		# Get only diagonal elements else 0
		k = searchsorted(partial, i)
		
		if len(partial) != k:
			if partial[k] == i:
				K[i] = -k-1 # neg -1 as we don't want to mix
							# the true extra elements
			else:
				K[i] = k
				extend += 1
		else:
			K[i] = k
			extend += 1

	newN = len(C)+extend
	newC = zeros(newN, dtype = C.dtype)
	newV = zeros(newN, dtype = V.dtype)
	added = zeros(n, dtype = minimum.dtype)
	move = 0
	
	# move = go move steps forward
	# goes from left --> right
	for i in range(size):
		a = A[i]
		k = K[i]
		left = R[i]
		right = R[i+1]
		l = left+move
		r = right+move
		m = l+k
		lk = left+k
		
		added[i] = move
		if k > -1:
			newC[l:m] = C[left:lk]
			newC[m] = i
			newC[m+1:r+1] = C[lk:right]
			
			newV[l:m] = V[left:lk]
			newV[m] = a
			newV[m+1:r+1] = V[lk:right]      
			move += 1
		else:
			newC[l:r] = C[left:right]
			newV[l:r] = V[left:right]
			newV[l-k-1] += a # -1 and not + 1 since -k
		  
	# Update rest of matrix if n > size  
	i += 1
	if i < n:
		newV[R[i]+move:] = V[R[i]:]
		newC[R[i]+move:] = C[R[i]:]
		added[i:] = move
	
	# Update rowPointer
	for i in range(n):
		a = added[i]
		if i < size:
			R[i] += a
		else:
			# Handles if n > d --> just pad the rowPointer
			for j in range(i, n):
				R[j] += a
			break
	R[n] += a # Add to end of rowPointer, since size(n+1)
	
	return newV, newC, R



[docs]def diagonal_add(val, colPointer, rowIndices, n, p, addon, copy = True):
	"""
	See _diagonal_add documentation.
	"""
	size = min(n, p)
	if ndim(addon) == 0:
		A = zeros(size, dtype = val.dtype)+addon
	else:
		A = addon
	assert len(A) == size
	minimum = minimum = getDtype(min(n,p), size = 1, uint = False)
	return _diagonal_add(val, colPointer, rowIndices, n, p, minimum, A, copy)



[docs]@njit(fastmath = True, nogil = True, cache = True)
def get_element(val, colPointer, rowIndices, n, p, i, j):
	"""
	[Added 14/10/2018]
	Get A[i,j] element. HyperLearn's algorithm is O(logp) complexity, which is much
	better than Scipy's O(p) complexity. HyperLearn uses binary search to find the
	element.
	"""
	assert i < n and j < p
	left = rowIndices[i]
	right = rowIndices[i+1]
	select = colPointer[left:right]
	search = searchsorted(select, j)
	
	if search < len(select):
		if select[search] == j:
			return val[left+search]
	return 0



def _mat_vec(val, colPointer, rowIndices, n, p, y):
	"""
	Added [13/10/2018]
	X @ y is found. Scipy & HyperLearn has similar speed. Notice, now
	HyperLearn can be parallelised! This reduces complexity to approx
	O(np/c) where c = no of threads / cores
	"""
	Z = zeros(n, dtype = y.dtype)
	
	for i in prange(n):
		s = 0
		for j in range(rowIndices[i], rowIndices[i+1]):
			s += val[j]*y[colPointer[j]]
		Z[i] = s
	return Z
mat_vec = njit(_mat_vec, fastmath = True, nogil = True, cache = True)
mat_vec_parallel = njit(_mat_vec, fastmath = True, nogil = True, parallel = True)


def _matT_vec(val, colPointer, rowIndices, n, p, y):
	"""
	Added [13/10/2018]
	X.T @ y is found. Notice how instead of converting CSR to CSC matrix, a direct
	X.T @ y can be found. Same complexity as mat_vec(X, y). Also, HyperLearn is
	parallelized, allowing for O(np/c) complexity.
	"""
	Z = zeros(p, dtype = y.dtype)

	for i in prange(n):
		yi = y[i]
		for j in range(rowIndices[i], rowIndices[i+1]):
			Z[colPointer[j]] += val[j]*yi
	return Z
matT_vec = njit(_matT_vec, fastmath = True, nogil = True, cache = True)
matT_vec_parallel = njit(_matT_vec, fastmath = True, nogil = True, parallel = True)



def _mat_mat(val, colPointer, rowIndices, n, p, X):
	"""
	Added [14/10/2018]
	A @ X is found where X is a dense matrix. Mostly the same as Scipy, albeit slightly faster.
	The difference is now, HyperLearn is parallelized, which can reduce times by 1/2 or more.
	"""
	K = X.shape[1]
	Z = zeros((n, K), dtype = X.dtype)

	for i in prange(n):
		left = rowIndices[i]
		right = rowIndices[i+1]
		
		for k in range(K):
			s = 0
			for j in range(left, right):
				s += val[j]*X[colPointer[j], k]
			Z[i, k] = s
	return Z
mat_mat = njit(_mat_mat, fastmath = True, nogil = True, cache = True)
mat_mat_parallel = njit(_mat_mat, fastmath = True, nogil = True, parallel = True)


def _matT_mat(val, colPointer, rowIndices, n, p, X):
	"""
	Added [14/10/2018]
	A.T @ X is found where X is a dense matrix. Mostly the same as Scipy, albeit slightly faster.
	The difference is now, HyperLearn is parallelized, which can reduce times by 1/2 or more.
	"""
	K = X.shape[1]
	A = zeros((K, p), dtype = X.dtype)
	zero = zeros(p, dtype = X.dtype)

	for k in prange(K):
		Z = zero.copy()
		y = X[:,k]
		
		for i in range(n):
			yi = y[i]
			for j in range(rowIndices[i], rowIndices[i+1]):
				Z[colPointer[j]] += val[j]*yi
		for i in range(p):
			A[k, i] = Z[i]
	return A.T.copy()
matT_mat = njit(_matT_mat, fastmath = True, nogil = True, cache = True)
matT_mat_parallel = njit(_matT_mat, fastmath = True, nogil = True, parallel = True)




[docs]def XXT_sparse(val, colPointer, rowIndices, n, p):
	"""
	See _XXT_sparse documentation.
	"""
	D = zeros((n,n), dtype = val.dtype)
	P = zeros(p, dtype = val.dtype)

	for k in prange(n-1):
		l = rowIndices[k]
		r = rowIndices[k+1]

		R = P.copy()
		b = l
		for i in range(r-l):
			x = colPointer[b]
			R[x] = val[b]
			b += 1
		
		for j in prange(k+1, n):
			l = rowIndices[j]
			r = rowIndices[j+1]
			s = 0
			c = l
			for a in range(r-l):
				z = colPointer[c]
				v = R[z]
				if v != 0:
					s += v*val[c]
				c += 1
			D[j, k] = s
	return D
_XXT_sparse_single = njit(XXT_sparse, fastmath = True, nogil = True, cache = True)
_XXT_sparse_parallel = njit(XXT_sparse, fastmath = True, nogil = True, parallel = True)


def _XXT(val, colPointer, rowIndices, n, p, n_jobs = 1):
	"""
	[Added 16/10/2018]
	Computes X @ XT very quickly. Approx 50-60% faster than Sklearn's version,
	as it doesn't optimize a lot. Note, computes only lower triangular X @ XT,
	and disregards diagonal (set to 0)
	"""
	XXT = _XXT_sparse_parallel(val, colPointer, rowIndices, n, p) if n_jobs != 1 else \
		_XXT_sparse_single(val, colPointer, rowIndices, n, p)
	return XXT



[docs]@njit(fastmath = True, nogil = True, cache = True)
def rowSum(val, colPointer, rowIndices, norm = False):
	"""
	[Added 17/10/2018]
	Computes rowSum**2 for sparse matrix efficiently, instead of using einsum
	"""
	n = len(rowIndices)-1
	S = zeros(n, dtype = val.dtype)

	for i in range(n):
		s = 0
		l = rowIndices[i]
		r = rowIndices[i+1]
		b = l
		for j in range(r-l):
			Xij = val[b]
			s += Xij*Xij
			b += 1
		S[i] = s
	if norm:
		S**=0.5
	return S