Source code for VeraGridEngine.Utils.Sparse.csc_numba

# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at https://mozilla.org/MPL/2.0/.  
# SPDX-License-Identifier: MPL-2.0



import numpy as np
import numba as nb
from numba.typed import List
import math
from typing import Tuple
from VeraGridEngine.basic_structures import Mat, Vec, IntVec

# @nb.njit("i4[:](i8)")

[docs]
@nb.njit(cache=True)
def ialloc(n):
    return np.zeros(n, dtype=nb.int32)



# @nb.njit("f8[:](i8)")

[docs]
@nb.njit(cache=True)
def xalloc(n):
    return np.zeros(n, dtype=nb.float64)



# @nb.njit("Tuple((i8, i8, i4[:], i4[:], f8[:], i8))(i8, i8, i8)")

[docs]
@nb.njit(cache=True)
def csc_spalloc_f(m, n, nzmax):
    """
    Allocate a sparse matrix (triplet form or compressed-column form).

    @param m: number of rows
    @param n: number of columns
    @param nzmax: maximum number of entries
    @return: m, n, Aindptr, Aindices, Adata, Anzmax
    """
    Anzmax = max(nzmax, 1)
    Aindptr = ialloc(n + 1)
    Aindices = ialloc(Anzmax)
    Adata = xalloc(Anzmax)
    return m, n, Aindptr, Aindices, Adata, Anzmax



# @nb.njit("(f8[:], f8[:], i8)")
@nb.njit(cache=True)
def _copy_f(src, dest, length):
    for i in range(length):
        dest[i] = src[i]


# @nb.njit("(i4[:], i4[:], i8)")
@nb.njit(cache=True)
def _copy_i(src, dest, length):
    for i in range(length):
        dest[i] = src[i]


# @nb.njit("i8(i4[:], i4[:], i8)")

[docs]
@nb.njit(cache=True)
def csc_cumsum_i(p, c, n):
    """
    p [0..n] = cumulative sum of c [0..n-1], and then copy p [0..n-1] into c

    @param p: size n+1, cumulative sum of c
    @param c: size n, overwritten with p [0..n-1] on output
    @param n: length of c
    @return: sum (c), null on error
    """
    nz = 0
    nz2 = 0.0

    for i in range(n):
        p[i] = nz
        nz += c[i]
        nz2 += c[i]              # also in double to avoid CS_INT overflow
        c[i] = p[i]             # also copy p[0..n-1] back into c[0..n-1]
    p[n] = nz
    return int(nz2)               # return sum (c [0..n-1])



# @nb.njit("Tuple((i4[:], f8[:], i8))(i8, i4[:], i4[:], f8[:], i8)")

[docs]
@nb.njit(cache=True)
def csc_sprealloc_f(An, Aindptr, Aindices, Adata, nzmax):
    """
    Change the max # of entries a sparse matrix can hold.
    :param An: number of columns
    :param Aindptr: csc column pointers
    :param Aindices: csc row indices
    :param Adata: csc data
    :param nzmax:new maximum number of entries
    :return: indices, data, nzmax
    """

    if nzmax <= 0:
        nzmax = Aindptr[An]

    length = min(nzmax, len(Aindices))
    Ainew = np.empty(nzmax, dtype=nb.int32)
    for i in range(length):
        Ainew[i] = Aindices[i]

    length = min(nzmax, len(Adata))
    Axnew = np.empty(nzmax, dtype=nb.float64)
    for i in range(length):
        Axnew[i] = Adata[i]

    return Ainew, Axnew, nzmax



# @nb.njit("i8(i4[:], i4[:], f8[:], i8, f8, i4[:], f8[:], i8, i4[:], i8)")

[docs]
@nb.njit(cache=True)
def csc_scatter_f(Ap, Ai, Ax, j, beta, w, x, mark, Ci, nz):
    """
    Scatters and sums a sparse vector A(:,j) into a dense vector, x = x + beta * A(:,j)
    :param Ap:
    :param Ai:
    :param Ax:
    :param j: the column of A to use
    :param beta: scalar multiplied by A(:,j)
    :param w: size m, node i is marked if w[i] = mark
    :param x: size m, ignored if null
    :param mark: mark value of w
    :param Ci: pattern of x accumulated in C.i
    :param nz: pattern of x placed in C starting at C.i[nz]
    :return: new value of nz, -1 on error, x and w are modified
    """

    for p in range(Ap[j], Ap[j + 1]):
        i = Ai[p]  # A(i,j) is nonzero
        if w[i] < mark:
            w[i] = mark  # i is new entry in column j
            Ci[nz] = i  # add i to pattern of C(:,j)
            nz += 1
            x[i] = beta * Ax[p]  # x(i) = beta*A(i,j)
        else:
            x[i] += beta * Ax[p]  # i exists in C(:,j) already
    return nz



# @nb.njit("i8(i4[:], i4[:], f8[:], i8, f8, i4[:], f8[:], i8, i4[:], i8)")

[docs]
@nb.njit(cache=True)
def csc_scatter_ff(Aindptr, Aindices, Adata, j, beta, w, x, mark, Ci, nz):
    """
    Scatters and sums a sparse vector A(:,j) into a dense vector, x = x + beta * A(:,j)
    :param Aindptr:
    :param Aindices:
    :param Adata:
    :param j: the column of A to use
    :param beta: scalar multiplied by A(:,j)
    :param w: size m, node i is marked if w[i] = mark
    :param x: size m, ignored if null
    :param mark: mark value of w
    :param Ci: pattern of x accumulated in C.i
    :param nz: pattern of x placed in C starting at C.i[nz]
    :return: new value of nz, -1 on error, x and w are modified
    """

    for p in range(Aindptr[j], Aindptr[j + 1]):
        i = Aindices[p]  # A(i,j) is nonzero
        if w[i] < mark:
            w[i] = mark  # i is new entry in column j
            Ci[nz] = i  # add i to pattern of C(:,j)
            nz += 1
            x[i] = beta * Adata[p]  # x(i) = beta*A(i,j)
        else:
            x[i] += beta * Adata[p]  # i exists in C(:,j) already
    return nz



# @nb.njit("Tuple((i8, i8, i4[:], i4[:], f8[:]))(i8, i8, i4[:], i4[:], f8[:], i8, i8, i4[:], i4[:], f8[:], f8, f8)")

[docs]
@nb.njit(cache=True)
def csc_add_ff(Am, An, Aindptr, Aindices, Adata,
               Bm, Bn, Bindptr, Bindices, Bdata, alpha, beta):
    """
    C = alpha*A + beta*B

    @param A: column-compressed matrix
    @param B: column-compressed matrix
    @param alpha: scalar alpha
    @param beta: scalar beta
    @return: C=alpha*A + beta*B, null on error (Cm, Cn, Cp, Ci, Cx)
    """
    nz = 0

    m, anz, n, Bp, Bx = Am, Aindptr[An], Bn, Bindptr, Bdata

    bnz = Bp[n]

    w = np.zeros(m, dtype=nb.int32)

    x = np.zeros(m, dtype=nb.float64)   # get workspace

    Cm, Cn, Cp, Ci, Cx, Cnzmax = csc_spalloc_f(m, n, anz + bnz)  # allocate result

    for j in range(n):
        Cp[j] = nz  # column j of C starts here

        nz = csc_scatter_f(Aindptr, Aindices, Adata, j, alpha, w, x, j + 1, Ci, nz)  # alpha*A(:,j)

        nz = csc_scatter_f(Bindptr, Bindices, Bdata, j, beta, w, x, j + 1, Ci, nz)  # beta*B(:,j)

        for p in range(Cp[j], nz):
            Cx[p] = x[Ci[p]]

    Cp[n] = nz  # finalize the last column of C

    return Cm, Cn, Cp, Ci, Cx  # success; free workspace, return C



# @nb.njit("Tuple((i8, i8, i4[:], i4[:], f8[:], i8))(i8, i8, i4[:], i4[:], f8[:], i8, i8, i4[:], i4[:], f8[:])",
#          parallel=False, nogil=True, fastmath=False, cache=True)  # fastmath=True breaks the code

[docs]
@nb.njit(cache=True)
def csc_multiply_ff(Am, An, Ap, Ai, Ax,
                    Bm, Bn, Bp, Bi, Bx):
    """
    Sparse matrix multiplication, C = A*B where A and B are CSC sparse matrices
    :param Am: number of rows in A
    :param An: number of columns in A
    :param Ap: column pointers of A
    :param Ai: indices of A
    :param Ax: data of A
    :param Bm: number of rows in B
    :param Bn: number of columns in B
    :param Bp: column pointers of B
    :param Bi: indices of B
    :param Bx: data of B
    :return: Cm, Cn, Cp, Ci, Cx, Cnzmax
    """
    assert An == Bm
    nz = 0
    anz = Ap[An]
    bnz = Bp[Bn]
    Cm = Am
    Cn = Bn

    t = nb
    w = np.zeros(Cn, dtype=t.int32)  # ialloc(m)  # get workspace
    x = np.empty(Cn, dtype=t.float64)  # xalloc(m)  # get workspace

    # allocate result

    Cnzmax = int(math.sqrt(Cm)) * anz + bnz  # the trick here is to allocate just enough memory to avoid reallocating
    Cp = np.empty(Cn + 1, dtype=t.int32)
    Ci = np.empty(Cnzmax, dtype=t.int32)
    Cx = np.empty(Cnzmax, dtype=t.float64)

    for j in range(Cn):

        # claim more space
        if nz + Cm > Cnzmax:
            # Ci, Cx, Cnzmax = csc_sprealloc_f(Cn, Cp, Ci, Cx, 2 * Cnzmax + m)
            print('Re-Allocating')
            Cnzmax = 2 * Cnzmax + Cm
            if Cnzmax <= 0:
                Cnzmax = Cp[An]

            length = min(Cnzmax, len(Ci))
            Cinew = np.empty(Cnzmax, dtype=nb.int32)
            for i in range(length):
                Cinew[i] = Ci[i]
            Ci = Cinew

            length = min(Cnzmax, len(Cx))
            Cxnew = np.empty(Cnzmax, dtype=nb.float64)
            for i in range(length):
                Cxnew[i] = Cx[i]
            Cx = Cxnew

        # column j of C starts here
        Cp[j] = nz

        # perform the multiplication
        for pb in range(Bp[j], Bp[j + 1]):
            for pa in range(Ap[Bi[pb]], Ap[Bi[pb] + 1]):
                ia = Ai[pa]
                if w[ia] < j + 1:
                    w[ia] = j + 1
                    Ci[nz] = ia
                    nz += 1
                    x[ia] = Bx[pb] * Ax[pa]
                else:
                    x[ia] += Bx[pb] * Ax[pa]

        for pc in range(Cp[j], nz):
            Cx[pc] = x[Ci[pc]]

    Cp[Cn] = nz  # finalize the last column of C

    # cut the arrays to their nominal size nnz
    # Ci, Cx, Cnzmax = csc_sprealloc_f(Cn, Cp, Ci, Cx, 0)
    Cnzmax = Cp[Cn]
    Cinew = Ci[:Cnzmax]
    Cxnew = Cx[:Cnzmax]

    return Cm, Cn, Cp, Cinew, Cxnew, Cnzmax



# @nb.njit("f8[:](i8, i8, i4[:], i4[:], f8[:], f8[:])", parallel=False)

[docs]
@nb.njit(cache=True)
def csc_mat_vec_ff(m, n, Ap, Ai, Ax, x):
    """
    Sparse matrix times dense column vector, y = A * x.
    :param m: number of rows
    :param n: number of columns
    :param Ap: pointers
    :param Ai: indices
    :param Ax: data
    :param x: vector x (n)
    :return: vector y (m)
    """

    assert n == x.shape[0]

    y = np.zeros(m, dtype=nb.float64)
    for j in range(n):
        for p in range(Ap[j], Ap[j + 1]):
            y[Ai[p]] += Ax[p] * x[j]
    return y




[docs]
@nb.njit(cache=True)
def diag_positions(n, Ap, Ai):
    """
    get the positions of the diagonal in the CSC data scheme
    :param n: number of rows and columns (must be square)
    :param Ap: pointers
    :param Ai: indices
    :return: vector pos (n)
    """

    pos = np.empty(n, dtype=nb.int32)
    k = 0
    for j in range(n):
        pos[j] = -1 # to make evident that there is o diagonal position

        p = Ap[j]
        found = False
        while p < Ap[j+1] and not found:

            if Ai[p] == j:
                pos[j] = k
                found = True

            p += 1
            k += 1
    return pos



# @nb.njit("Tuple((i8, i8, i4[:], i4[:], f8[:]))(i8, i8, i4[:], i4[:], f8[:], i8)")

[docs]
@nb.njit(cache=True)
def coo_to_csc(m, n, Ti, Tj, Tx, nnz):
    """
    C = compressed-column form of a triplet matrix T.
    The columns of T are not sorted, and duplicate entries may be present in T.

    :param m: row number
    :param n: column number
    :param Ti: array of row indices
    :param Tj: array of column indices
    :param Tx: array of data
    :param nnz: non-zero entries, if there are no duplicate entries, nnz = len(Tx)
    :return: Cm, Cn, Cp, Ci, Cx
    """

    Cm, Cn, Cp, Ci, Cx, nnz = csc_spalloc_f(m, n, nnz)  # allocate result

    w = np.zeros(n, dtype=nb.int32)  # get workspace

    for k in range(nnz):
        w[Tj[k]] += 1  # column counts

    csc_cumsum_i(Cp, w, n)  # column pointers

    for k in range(nnz):
        p = w[Tj[k]]
        w[Tj[k]] += 1
        Ci[p] = Ti[k]  # A(i,j) is the pth entry in C
        # if Cx is not None:
        Cx[p] = Tx[k]

    return Cm, Cn, Cp, Ci, Cx



# @nb.njit("void(i8, i8, i4[:], i4[:], f8[:], i4[:], i4[:], f8[:])")

[docs]
@nb.njit(cache=True)
def csc_to_csr(m, n, Ap, Ai, Ax, Bp, Bi, Bx):
    """
    Convert a CSC Matrix into a CSR Matrix
    :param m: number of rows
    :param n: number of columns
    :param Ap: indptr of the CSC matrix
    :param Ai: indices of the CSC matrix
    :param Ax: data of the CSC matrix
    :param Bp: indptr of the CSR matrix (to compute, size 'm+1', has to be initialized to zeros)
    :param Bi: indices of the CSR matrix (to compute, size nnz)
    :param Bx: data of the CSR matrix (to compute, size nnz)
    """
    nnz = Ap[n]

    for k in range(nnz):
        Bp[Ai[k]] += 1

    cum_sum = 0
    for col in range(m):
        temp = Bp[col]
        Bp[col] = cum_sum
        cum_sum += temp
    Bp[m] = nnz

    for row in range(n):
        for jj in range(Ap[row], Ap[row+1]):
            col = Ai[jj]
            dest = Bp[col]
            Bi[dest] = row
            Bx[dest] = Ax[jj]
            Bp[col] += 1

    last = 0
    for col in range(m):
        temp = Bp[col]
        Bp[col] = last
        last = temp



# @nb.njit("Tuple((i8, i8, i4[:], i4[:], f8[:]))(i8, i8, i4[:], i4[:], f8[:])")

[docs]
@nb.njit(cache=True)
def csc_transpose(m, n, Ap, Ai, Ax):
    """
    Transpose matrix
    :param m: A.m
    :param n: A.n
    :param Ap: A.indptr
    :param Ai: A.indices
    :param Ax: A.data
    :return: Cm, Cn, Cp, Ci, Cx
    """

    """
    Computes the transpose of a sparse matrix, C =A';

    @param A: column-compressed matrix
    @param allocate_values: pattern only if false, both pattern and values otherwise
    @return: C=A', null on error
    """

    Cm, Cn, Cp, Ci, Cx, Cnzmax = csc_spalloc_f(m=n, n=m, nzmax=Ap[n])  # allocate result

    w = ialloc(m)  # get workspace

    for p in range(Ap[n]):
        w[Ai[p]] += 1  # row counts

    csc_cumsum_i(Cp, w, m)  # row pointers

    for j in range(n):
        for p in range(Ap[j], Ap[j + 1]):
            q = w[Ai[p]]
            w[Ai[p]] += 1
            Ci[q] = j  # place A(i,j) as entry C(j,i)
            Cx[q] = Ax[p]

    return Cm, Cn, Cp, Ci, Cx



# @nb.njit("i4(i4, i4, i4[:])")

[docs]
@nb.njit(cache=True)
def binary_find(N, x, array):
    """
    Binary search
    :param N: size of the array
    :param x: value
    :param array: array
    :return: position where it is found. -1 if it is not found
    """
    lower = 0
    upper = N

    while (lower + 1) < upper:
        mid = int((lower + upper) / 2)
        if x < array[mid]:
            upper = mid
        else:
            lower = mid

    if array[lower] <= x:
        return lower
    return -1



# @nb.njit("Tuple((i8, i4[:], i4[:], f8[:]))(i8, i8, i4[:], i4[:], f8[:], i4[:], i4[:])")

[docs]
def csc_sub_matrix_old(Am, Anz, Ap, Ai, Ax, rows, cols):
    """
    Get SCS arbitrary sub-matrix
    :param Am: number of rows
    :param Anz: number of non-zero entries
    :param Ap: Column pointers
    :param Ai: Row indices
    :param Ax: Data
    :param rows: row indices to keep
    :param cols: column indices to keep
    :return: CSC sub-matrix (n, new_col_ptr, new_row_ind, new_val)
    """
    n_cols = len(cols)

    Bx = np.zeros(Anz, dtype=np.float64)
    Bi = np.empty(Anz, dtype=np.int32)
    Bp = np.empty(n_cols + 1, dtype=np.int32)

    n = 0
    p = 0
    Bp[p] = 0

    for j in cols:  # for each column selected ...
        i = 0
        for r in rows:
            for k in range(Ap[j], Ap[j + 1]):  # for each row of the column j of A...
                if Ai[k] == r:
                    Bx[n] = Ax[k]  # store the value
                    Bi[n] = i  # row index in the new matrix
                    i += 1
                    n += 1
            if i == 0:
                i += 1
        p += 1
        Bp[p] = n

    Bp[p] = n

    return n, Bp, Bi[:n], Bx[:n]




[docs]
@nb.njit(cache=True)
def csc_sub_matrix(Am, Annz, Ap, Ai, Ax, rows, cols):
    """
    CSC matrix sub-matrix view
    :param Am: number of rows
    :param Annz: number of non-zero entries
    :param Ap: Column pointers
    :param Ai: Row indices
    :param Ax: Data
    :param rows: array of selected rows: must be sorted! to use the binary search
    :param cols: array of columns: should be sorted
    :return:
    """
    n_rows = len(rows)
    n_cols = len(cols)

    nnz = 0
    p = 0
    Bx = np.empty(Annz, dtype=nb.float64)  # data
    Bi = np.empty(Annz, dtype=nb.int32)  # indices
    Bp = np.empty(n_cols + 1, dtype=nb.int32)  # pointers
    Bp[p] = 0

    # generate lookup for the non immediate axis (for CSC it is the rows) -> index lookup
    lookup = np.zeros(Am, dtype=nb.int32)
    lookup[rows] = np.arange(len(rows), dtype=nb.int32)

    for j in cols:  # sliced columns

        for k in range(Ap[j], Ap[j + 1]):  # rows of A[:, j]

            # row index translation to the "rows" space
            i = Ai[k]
            ii = lookup[i]

            if rows[ii] == i:
                # entry found
                Bx[nnz] = Ax[k]
                Bi[nnz] = ii
                nnz += 1

        p += 1
        Bp[p] = nnz

    Bp[p] = nnz

    # numba does not support resize...
    # new_val = np.resize(new_val, nnz)
    # new_row_ind = np.resize(new_row_ind, nnz)

    return Bx, Bi, Bp, n_rows, n_cols, nnz




[docs]
@nb.njit(cache=True)
def csc_sub_matrix_cols(Am, Anz, Ap, Ai, Ax, cols):
    """
    Get SCS arbitrary sub-matrix with all the rows
    :param Am: number of rows
    :param Anz: number of non-zero entries
    :param Ap: Column pointers
    :param Ai: Row indices
    :param Ax: Data
    :param cols: column indices to keep
    :return: CSC sub-matrix (n, new_col_ptr, new_row_ind, new_val)
    """

    n_cols = len(cols)
    n = 0
    p = 0
    Bx = np.empty(Anz, dtype=nb.float64)
    Bi = np.empty(Anz, dtype=nb.int32)
    Bp = np.empty(n_cols + 1, dtype=nb.int32)

    Bp[p] = 0

    for j in cols:  # for each column selected ...
        for k in range(Ap[j], Ap[j + 1]):  # for each row of the column j of A...
            # store the values if the row was found in rows
            Bx[n] = Ax[k]  # store the value
            Bi[n] = Ai[k]  # store the row index
            n += 1
        p += 1
        Bp[p] = n

    Bp[p] = n

    return n, Bp, Bi[:n], Bx[:n]




[docs]
def csc_sub_matrix_rows(An, Anz, Ap, Ai, Ax, rows):
    """
    Get SCS arbitrary sub-matrix
    :param An: number of rows
    :param Anz: number of non-zero entries
    :param Ap: Column pointers
    :param Ai: Row indices
    :param Ax: Data
    :param rows: row indices to keep
    :return: CSC sub-matrix (n, new_col_ptr, new_row_ind, new_val)
    """
    n_rows = len(rows)
    n = 0
    p = 0
    Bx = np.zeros(Anz, dtype=np.float64)
    Bi = np.empty(Anz, dtype=np.int32)
    Bp = np.empty(An + 1, dtype=np.int32)

    Bp[p] = 0

    for j in range(An):  # for each column selected ...
        i = 0
        for r in rows:
            for k in range(Ap[j], Ap[j + 1]):  # for each row of the column j of A...
                if Ai[k] == r:
                    Bx[n] = Ax[k]  # store the value
                    Bi[n] = i  # row index in the new matrix
                    n += 1
                    i += 1
            if i == 0:
                i += 1
        p += 1
        Bp[p] = n

    Bp[p] = n

    return n, Bp, Bi[:n], Bx[:n]



# @nb.njit("f8[:, :](i8, i8, i4[:], i4[:], f8[:])")

[docs]
def csc_to_dense(m, n, indptr, indices, data):
    """
    Convert csc matrix to dense
    :param m:
    :param n:
    :param indptr:
    :param indices:
    :param data:
    :return: 2d numpy array
    """
    val = np.zeros((m, n), dtype=np.float64)

    for j in range(n):
        for p in range(indptr[j], indptr[j + 1]):
            val[indices[p], j] = data[p]
    return val



# @nb.njit("Tuple((i4[:], i4[:], f8[:]))(i8, f8)")

[docs]
@nb.njit(cache=True)
def csc_diagonal(m, value=1.0):
    """
    Build CSC diagonal matrix of the given value
    :param m: size
    :param value: value
    :return: CSC matrix
    """
    indptr = np.empty(m + 1, dtype=np.int32)
    indices = np.empty(m, dtype=np.int32)
    data = np.empty(m, dtype=np.float64)
    for i in range(m):
        indptr[i] = i
        indices[i] = i
        data[i] = value
    indptr[m] = m

    return indices, indptr, data



# @nb.njit("Tuple((i4[:], i4[:], f8[:]))(i8, f8[:])")

[docs]
@nb.njit(cache=True)
def csc_diagonal_from_array(array):
    """

    :param m:
    :param array:
    :return:
    """
    m = len(array)
    indptr = np.zeros(m + 1, dtype=np.int32)
    indices = np.zeros(m, dtype=np.int32)
    data = np.zeros(m)

    for i in range(m):
        indptr[i] = i
        indices[i] = i
        data[i] = array[i]

    indptr[m] = m

    return data, indices, indptr




[docs]
@nb.njit(cache=True)
def csc_diagonal_from_complex_array(array):
    """

    :param m:
    :param array:
    :return:
    """
    m = len(array)
    indptr = np.zeros(m + 1, dtype=np.int32)
    indices = np.zeros(m, dtype=np.int32)
    data = np.zeros(m, dtype=nb.complex128)

    for i in range(m):
        indptr[i] = i
        indices[i] = i
        data[i] = array[i]

    indptr[m] = m

    return data, indices, indptr




[docs]
@nb.njit(cache=True)
def csc_diagonal_from_number(m: int, value: float):
    """

    :param m:
    :param value:
    :return:
    """
    indptr = np.zeros(m + 1, dtype=np.int32)
    indices = np.zeros(m, dtype=np.int32)
    data = np.zeros(m)

    for i in range(m):
        indptr[i] = i
        indices[i] = i
        data[i] = value

    indptr[m] = m

    return data, indices, indptr




[docs]
@nb.njit(cache=True)
def csc_stack_4_by_4_ff(am, an, Ai, Ap, Ax,
                        bm, bn, Bi, Bp, Bx,
                        cm, cn, Ci, Cp, Cx,
                        dm, dn, Di, Dp, Dx):
    """
    stack csc sparse float matrices like this:
    | A | B |
    | C | D |

    :param am:
    :param an:
    :param Ai:
    :param Ap:
    :param Ax:
    :param bm:
    :param bn:
    :param Bi:
    :param Bp:
    :param Bx:
    :param cm:
    :param cn:
    :param Ci:
    :param Cp:
    :param Cx:
    :param dm:
    :param dn:
    :param Di:
    :param Dp:
    :param Dx:
    :return:
    """

    # check dimensional compatibility
    assert am == bm
    assert cm == dm
    assert an == cn
    assert bn == dn

    nnz = Ap[an] + Bp[bn] + Cp[cn] + Dp[dn]

    m = am + cm
    n = an + bn

    indptr = np.zeros(n + 1, dtype=nb.int32)
    indices = np.zeros(nnz, dtype=nb.int32)
    data = np.zeros(nnz, dtype=nb.float64)
    cnt = 0
    indptr[0] = 0
    for j in range(an):  # for every column, same as range(cols + 1) For A and C
        for k in range(Ap[j], Ap[j + 1]):  # for every entry in the column from A
            indices[cnt] = Ai[k]  # row index
            data[cnt] = Ax[k]
            cnt += 1

        for k in range(Cp[j], Cp[j + 1]):  # for every entry in the column from C
            indices[cnt] = Ci[k] + am  # row index
            data[cnt] = Cx[k]
            cnt += 1

        indptr[j + 1] = cnt

    for j in range(bn):  # for every column, same as range(cols + 1) For B and D
        for k in range(Bp[j], Bp[j + 1]):  # for every entry in the column from B
            indices[cnt] = Bi[k]  # row index
            data[cnt] = Bx[k]
            cnt += 1

        for k in range(Dp[j], Dp[j + 1]):  # for every entry in the column from D
            indices[cnt] = Di[k] + bm  # row index
            data[cnt] = Dx[k]
            cnt += 1

        indptr[an + j + 1] = cnt

    return m, n, indices, indptr, data





[docs]
@nb.njit(cache=True)
def csc_stack_3_by_4_ff(am, an, Ai, Ap, Ax,
                        bm, bn, Bi, Bp, Bx,
                        cm, cn, Ci, Cp, Cx):
    """
    stack csc sparse float matrices like this:
    | A | B |
    | C | 0 |

    :param am:
    :param an:
    :param Ai:
    :param Ap:
    :param Ax:
    :param bm:
    :param bn:
    :param Bi:
    :param Bp:
    :param Bx:
    :param cm:
    :param cn:
    :param Ci:
    :param Cp:
    :param Cx:
    :return:
    """

    # check dimensional compatibility
    assert am == bm
    assert an == cn

    nnz = Ap[an] + Bp[bn] + Cp[cn]

    m = am + cm
    n = an + bn

    indptr = np.zeros(n + 1, dtype=nb.int32)
    indices = np.zeros(nnz, dtype=nb.int32)
    data = np.zeros(nnz, dtype=nb.float64)
    cnt = 0
    indptr[0] = 0
    for j in range(an):  # for every column, same as range(cols + 1) For A and C
        for k in range(Ap[j], Ap[j + 1]):  # for every entry in the column from A
            indices[cnt] = Ai[k]  # row index
            data[cnt] = Ax[k]
            cnt += 1

        for k in range(Cp[j], Cp[j + 1]):  # for every entry in the column from C
            indices[cnt] = Ci[k] + am  # row index
            data[cnt] = Cx[k]
            cnt += 1

        indptr[j + 1] = cnt

    for j in range(bn):  # for every column, same as range(cols + 1) For B and D
        for k in range(Bp[j], Bp[j + 1]):  # for every entry in the column from B
            indices[cnt] = Bi[k]  # row index
            data[cnt] = Bx[k]
            cnt += 1

        indptr[an + j + 1] = cnt

    return m, n, indices, indptr, data




[docs]
@nb.njit(cache=True)
def csc_norm(n, Ap, Ax):
    """
    Computes the 1-norm of a sparse matrix = max (sum (abs (A))), largest
    column sum.

    @param A: column-compressed matrix
    @return: the 1-norm if successful, -1 on error
    """
    norm = 0

    for j in range(n):
        s = 0
        for p in range(Ap[j], Ap[j + 1]):
            s += abs(Ax[p])
        norm = max(norm, s)
    return norm




[docs]
@nb.njit(cache=True)
def find_islands(node_number, indptr, indices):
    """
    Method to get the islands of a graph
    This is the non-recursive version
    :return: islands list where each element is a list of the node indices of the island
    """

    # Mark all the vertices as not visited
    visited = np.zeros(node_number, dtype=nb.boolean)

    # storage structure for the islands (list of lists)
    islands = List.empty_list(List.empty_list(nb.int64))

    # set the island index
    island_idx = 0

    # go though all the vertices...
    for node in range(node_number):

        # if the node has not been visited...
        if not visited[node]:

            # add new island, because the recursive process has already visited all the island connected to v
            # if island_idx >= len(islands):
            islands.append(List.empty_list(nb.int64))

            # ------------------------------------------------------------------------------------------------------
            # DFS: store in the island all the reachable vertices from current vertex "node"
            #
            # declare a stack with the initial node to visit (node)
            stack = List.empty_list(nb.int64)
            stack.append(node)

            while len(stack) > 0:

                # pick the first element of the stack
                v = stack.pop(0)

                # if v has not been visited...
                if not visited[v]:

                    # mark as visited
                    visited[v] = True

                    # add element to the island
                    islands[island_idx].append(v)

                    # Add the neighbours of v to the stack
                    start = indptr[v]
                    end = indptr[v + 1]
                    for i in range(start, end):
                        k = indices[i]  # get the column index in the CSC scheme
                        if not visited[k]:
                            stack.append(k)
            # ------------------------------------------------------------------------------------------------------

            # increase the islands index, because all the other connected vertices have been visited
            island_idx += 1

    # sort the islands to maintain raccord
    # for island in islands:
    #     island.sort()
    return islands




# @nb.njit("Tuple((i4[:], i4[:], f8[:], i8, i8))(i8, i4[:], i4[:], f8[:], i8[:])")

[docs]
@nb.njit(cache=True)
def sp_submat_c_numba(nrows, ptrs, indices, values, cols):
    """
    slice CSC columns
    :param nrows: number of rows of the matrix
    :param ptrs: row pointers
    :param indices: column indices
    :param values: data
    :param cols: vector of columns to slice
    :return: new_indices, new_col_ptr, new_val, nrows, ncols
    """
    # pass1: determine the number of non-zeros
    nnz = 0
    for j in cols:
        for k in range(ptrs[j], ptrs[j+1]):
            nnz += 1

    # pass2: size the vector and perform the slicing
    ncols = len(cols)
    n = 0
    p = 0

    new_val = np.empty(nnz, dtype=nb.float64)
    new_indices = np.empty(nnz, dtype=nb.int32)
    new_col_ptr = np.empty(ncols + 1, dtype=nb.int32)

    new_col_ptr[p] = 0

    for j in cols:
        for k in range(ptrs[j], ptrs[j + 1]):
            new_val[n] = values[k]
            new_indices[n] = indices[k]
            n += 1

        p += 1
        new_col_ptr[p] = n

    return new_indices, new_col_ptr, new_val, nrows, ncols




[docs]
@nb.njit(nogil=True, fastmath=True, cache=True)
def csc_stack_2d_ff_row_major(mats_data, mats_indptr, mats_indices, mats_cols, mats_rows, m_rows=1, m_cols=1):
    """
    Assemble matrix from a list of matrices representing a "super matrix"

    |mat11 | mat12 | mat13 |
    |mat21 | mat22 | mat23 |

    turns into:

    mats = [mat11, mat12, mat13, mat21, mat22, mat23]
    m_rows = 2
    m_cols = 3

    :param mats_data: array of numpy arrays with the data of each CSC matrix
    :param mats_indptr: array of numpy arrays with the indptr of each CSC matrix
    :param mats_indices: array of numpy arrays with the indices of each CSC matrix
    :param mats_cols: array with the number of columns of each CSC matrix
    :param mats_rows: array with the number of rows of each CSC matrix
    :param m_rows: number of rows of the mats structure
    :param m_cols: number of cols of the mats structure
    :return: Final assembled matrix
    """

    """
    Row major: A(r,c) element is at A[c + r * n_columns]; 
    Col major: A(r,c) element is at A[r + c * n_rows];    
    """

    # pass 1: compute the number of non zero
    nnz = 0
    nrows = 0
    ncols = 0
    for r in range(m_rows):
        nrows += mats_rows[r * m_cols]  # equivalent to mats[r, 0]
        for c in range(m_cols):
            col = mats_cols[c + r * m_cols]  # equivalent to mats[r, c]
            nnz += mats_indptr[r * m_cols + c][col]
            if r == 0:
                ncols += col

    # pass 2: fill in the data
    indptr = np.empty(ncols + 1, dtype=np.int32)
    indices = np.empty(nnz, dtype=np.int32)
    data = np.empty(nnz, dtype=np.float64)
    cnt = 0
    indptr[0] = 0
    offset_col = 0
    for c in range(m_cols):  # for each column of the array of matrices

        # number of columns
        n = mats_cols[c]  # equivalent to mats[0, c]

        if n > 0:
            for j in range(n):  # for every column of the column of matrices

                offset_row = 0

                for r in range(m_rows):  # for each row of the array of rows

                    # number of rows
                    m = mats_rows[r * m_cols + c]  # equivalent to mats[r, c].shape[0]

                    if m > 0:
                        Ap = mats_indptr[r * m_cols + c]
                        Ai = mats_indices[r * m_cols + c]
                        Ax = mats_data[r * m_cols + c]

                        for k in range(Ap[j], Ap[j + 1]):  # for every entry in the column from A
                            indices[cnt] = Ai[k] + offset_row  # row index
                            data[cnt] = Ax[k]
                            cnt += 1

                        offset_row += m

                indptr[offset_col + j + 1] = cnt
            offset_col += n

    return data, indices, indptr, nrows, ncols




[docs]
@nb.njit(nogil=True, fastmath=True, cache=True)
def csc_stack_2d_ff_col_major(mats_data, mats_indptr, mats_indices, mats_cols, mats_rows, m_rows=1, m_cols=1):
    """
    Assemble matrix from a list of matrices representing a "super matrix"

    |mat11 | mat12 | mat13 |
    |mat21 | mat22 | mat23 |

    turns into:

    mats = [mat11, mat12, mat13, mat21, mat22, mat23]
    m_rows = 2
    m_cols = 3

    :param mats_data: array of numpy arrays with the data of each CSC matrix
    :param mats_indptr: array of numpy arrays with the indptr of each CSC matrix
    :param mats_indices: array of numpy arrays with the indices of each CSC matrix
    :param mats_cols: array with the number of columns of each CSC matrix
    :param mats_rows: array with the number of rows of each CSC matrix
    :param m_rows: number of rows of the mats structure
    :param m_cols: number of cols of the mats structure
    :return: Final assembled matrix
    """

    """
    Col major: A(r,c) element is at A[r + c * n_rows];    
    """

    # pass 1: compute the number of non zero
    nnz = 0
    nrows = 0
    ncols = 0
    for r in range(m_rows):
        nrows += mats_rows[r]  # equivalent to mats[r, 0]
        for c in range(m_cols):
            col = mats_cols[r + c * m_rows]  # equivalent to mats[r, c]
            nnz += mats_indptr[r + c * m_rows][col]
            if r == 0:
                ncols += col

    # pass 2: fill in the data
    indptr = np.empty(ncols + 1, dtype=np.int32)
    indices = np.empty(nnz, dtype=np.int32)
    data = np.empty(nnz, dtype=np.float64)
    cnt = 0
    indptr[0] = 0
    offset_col = 0
    for c in range(m_cols):  # for each column of the array of matrices

        # number of columns
        n = mats_cols[c * m_rows]  # equivalent to mats[0, c]

        if n > 0:
            for j in range(n):  # for every column of the column of matrices

                offset_row = 0

                for r in range(m_rows):  # for each row of the array of rows

                    # number of rows
                    m = mats_rows[r + c * m_rows]  # equivalent to mats[r, c].shape[0]

                    if m > 0:
                        Ap = mats_indptr[r + c * m_rows]
                        Ai = mats_indices[r + c * m_rows]
                        Ax = mats_data[r + c * m_rows]

                        for k in range(Ap[j], Ap[j + 1]):  # for every entry in the column from A
                            indices[cnt] = Ai[k] + offset_row  # row index
                            data[cnt] = Ax[k]
                            cnt += 1

                        offset_row += m

                indptr[offset_col + j + 1] = cnt
            offset_col += n

    return data, indices, indptr, nrows, ncols




[docs]
@nb.njit(cache=True)
def dense_to_csc_numba(mat: Mat, threshold: float) -> Tuple[Vec, IntVec, IntVec]:
    """
    Extract the sparse matrix from a dense matrix where abs values are below a threshold
    :param mat: dense matrix
    :param threshold: threshold
    :return: data, indices, indptr
    """
    n_row, n_col = mat.shape

    indptr = np.empty(n_col + 1)
    indices = np.empty(n_row * n_col)
    data = np.empty(n_row * n_col)
    k = 0
    for j in range(n_col):

        indptr[j] = k

        for i in range(n_row):

            if abs(mat[i, j]) > threshold:
                data[k] = mat[i, j]
                indices[k] = i
                k += 1

    indptr[n_col] = k
    if k < (n_col * n_row):
        data = data[:k]
        indices = indices[:k]

    return data, indices, indptr




[docs]
@nb.njit(cache=True)
def get_sparse_array_numba(arr: Vec, threshold: float) -> Tuple[Vec, IntVec]:
    """
    Extract the sparse array from a dense array where abs values are below a threshold
    :param arr: dense matrix
    :param threshold: threshold
    :return: data, indices
    """
    n = len(arr)

    indices = np.empty(n, dtype=nb.int32)
    data = np.empty(n)
    k = 0
    for i in range(n):

        if abs(arr[i]) > threshold:
            data[k] = arr[i]
            indices[k] = i
            k += 1

    if k < n:
        data = data[:k]
        indices = indices[:k]

    return data, indices