Hi Julians,

I’ve ported this matlab code from Clustering by low-rank doubly stochastic matrix decomposition to Julia. But it’s much too slow for my application.

I’m still green to Julia so posting here to see if there’s any performance tips (or glaring *aesthetic* or other improvements) to make it faster. Especially on the sparse multiplication front.

I failed to get LoopVectorization to work or threading to speed it up.

test with

```
A = sprand(1000, 1000, 0.25)
# 20 clusters, dirichlet ~ 1, 1000 iters
Wout, ce = dcd(A, 20, nothing, 1, 1000)
```

Looking to do this on matrices in the 100k to millions.

```
using LinearAlgebra, Random, SparseArrays
function dcd(A::SparseMatrixCSC{Tv, Ti},
r::Int,
W0 = nothing,
alpha::Real = 1.,
max_iter::Int = 10000) where {Tv, Ti}
n = size(A, 1);
if W0 === nothing
W0 = rand(n, r);
end
W0 ./= sum(W0, dims = 2) .+ eps()
W = W0;
I_index, J_index, Anz = findnz(A);
ZW = zeros(Tv, n, r)
for iter ∈ 1:max_iter
inv_s = 1 ./ (sum(W, dims = 1) .+ eps())
inv_W = 1 ./ (W .+ eps());
# created ZW up top to make this in-place, does that make much of a difference?
# ZW = sp_factor_ratio(I_index, J_index, A, W .* inv_s, W') * W
# sp_factor_ratio is defined below it does
# Z = A ./ ( (W .* inv_s) * W' ) but faster
mul!(ZW, sp_factor_ratio(I_index, J_index, A, W .* inv_s, W'), W)
gradn = 2 * ZW .* inv_s .+ alpha * inv_W
gradp = ( diag(W' * ZW)' .* inv_s .^ 2 ) .+ inv_W
a = sum(W ./ (gradp .+ eps()), dims = 2);
b = sum(W .* gradn ./ (gradp .+ eps()), dims = 2);
# is it better to break these up like below or keep it as one calculation?
# W = W .* (gradn .* a .+ 1) ./ ( (gradp .* a) .+ b .+ eps())
# @. W *= (gradn * a + 1) / ( (gradp * a) + b + eps())
W .*= gradn .* a .+ 1
W ./= ( (gradp .* a) .+ b .+ eps())
end
Ahat = sum( sp_factor(I_index, J_index, W, W') ./ (sum(W, dims = 1) .+ eps()), dims = 2)
cross_entropy = -Anz' * log.(vec(Ahat) .+ eps());
return W , cross_entropy
end
# Helps calculate the cross-entropy in a sparse manner
function sp_factor(I::AbstractVector, J::AbstractVector,
W::AbstractArray{T,N}, H::AbstractArray{T,N}) where {T, N}
nz = size(I, 1)
m = size(W, 1)
r = size(W, 2)
WH = zeros(nz, r)
for t = 1:nz
i = I[t]
j = J[t]
for k = 1:r
WH[t, k] = W[i, k] * H[k, j]
end
end
return WH;
end
# calculates
# Z = X ./ ( (W .* inv_s) * H )
function sp_factor_ratio(I::AbstractVector, J::AbstractVector,
X::SparseMatrixCSC{Tv, Ti}, W::Matrix{T},
H::AbstractArray{T, N}) where {Tv, Ti, T, N}
n = size(X, 2)
r = size(W, 2)
irs = X.rowval
jcs = X.colptr
Z = sparse(I, J, zeros(size(I, 1)))
ind = 1
for row = 1:n # row
col_start = jcs[row]
col_stop = jcs[row + 1]
if (col_start != col_stop)
for current_col_index = col_start:col_stop - 1 # col
col = irs[ind];
xhat = 0;
for k = 1:r
xhat += W[row, k] * H[k, col];
end
xhat = max(xhat, eps());
Z.nzval[ind] = X.nzval[ind] / xhat;
ind += 1
end
end
end
return Z
end
```