From 23bc7c94ca3169e4e33743fb3ccaf4eb9ec43b4c Mon Sep 17 00:00:00 2001 From: Sobhan Mohammadpour Date: Sun, 5 Jun 2022 14:24:29 -0400 Subject: [PATCH 1/2] make lu! allocate less if possible --- src/solvers/umfpack.jl | 45 +++++++++++++++++++++++++++++++----------- 1 file changed, 34 insertions(+), 11 deletions(-) diff --git a/src/solvers/umfpack.jl b/src/solvers/umfpack.jl index e01db631..0d19e56f 100644 --- a/src/solvers/umfpack.jl +++ b/src/solvers/umfpack.jl @@ -139,6 +139,9 @@ function show_umf_info(control::Vector{Float64}, info::Vector{Float64}=umf_info, end +# TODO, this actually doesn't need to be this big if iter refinement is off +worspace_W_size(S::SparseMatrixCSC{Float64}) = 5 * size(S, 2) +worspace_W_size(S::SparseMatrixCSC{ComplexF64}) = 10 * size(S, 2) """ @@ -152,9 +155,16 @@ struct UmfpackWS{T<:UMFITypes} W::Vector{Float64} end -# TODO, this actually doesn't need to be this big if iter refinement is off -UmfpackWS(S::SparseMatrixCSC{Float64, T}) where T = UmfpackWS{T}(Vector{T}(undef, size(S, 2)), Vector{Float64}(undef, 5 * size(S, 2))) -UmfpackWS(S::SparseMatrixCSC{ComplexF64, T}) where T = UmfpackWS{T}(Vector{T}(undef, size(S, 2)), Vector{Float64}(undef, 10 * size(S, 2))) +function Base.resize!(W::UmfpackWS, S::SparseMatrixCSC) + resize!(W.Wi, size(S, 2)) + resize!(W.W, worspace_W_size(S)) +end + + +UmfpackWS(S::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti} = UmfpackWS{Ti}( + Vector{Ti}(undef, size(S, 2)), + Vector{Float64}(undef, worspace_W_size(S))) + Base.similar(w::UmfpackWS) = UmfpackWS(similar(w.Wi), similar(w.W)) @@ -338,19 +348,32 @@ julia> F \\ ones(2) 1.0 ``` """ -function lu!(F::UmfpackLU, S::SparseMatrixCSC{<:UMFVTypes,<:UMFITypes}; check::Bool=true) +function lu!(F::UmfpackLU, S::SparseMatrixCSC{<:UMFVTypes,Ti}; check::Bool=true) where {Ti<:UMFITypes} zerobased = getcolptr(S)[1] == 0 - # resize workspace if needed - if F.n < size(S, 2) - F.workspace = UmfpackWS(S) - end F.m = size(S, 1) F.n = size(S, 2) - F.colptr = zerobased ? copy(getcolptr(S)) : decrement(getcolptr(S)) - F.rowval = zerobased ? copy(rowvals(S)) : decrement(rowvals(S)) - F.nzval = copy(nonzeros(S)) + # resize workspace if needed + resize!(F.workspace, S) + + resize!(F.colptr, length(getcolptr(S))) + if zerobased + copy!(F.colptr, getcolptr(S)) + else + F.colptr .= getcolptr(S) .- one(Ti) + end + + resize!(F.rowval, length(rowvals(S))) + if zerobased + copy!(F.rowval, rowvals(S)) + else + F.rowval .= rowvals(S) .- one(Ti) + end + + resize!(F.nzval, length(nonzeros(S))) + copy!(F.nzval, nonzeros(S)) + umfpack_numeric!(F, reuse_numeric = false) check && (issuccess(F) || throw(LinearAlgebra.SingularException(0))) return F From 384070500a0f83d13f91bc762ba9f8ae2fc82757 Mon Sep 17 00:00:00 2001 From: Sobhan Mohammadpour Date: Thu, 23 Jun 2022 00:25:38 -0400 Subject: [PATCH 2/2] make lu! allocated less, change the lock types --- src/solvers/umfpack.jl | 190 ++++++++++++++++++----------------------- 1 file changed, 84 insertions(+), 106 deletions(-) diff --git a/src/solvers/umfpack.jl b/src/solvers/umfpack.jl index 512950d5..72f1a5c8 100644 --- a/src/solvers/umfpack.jl +++ b/src/solvers/umfpack.jl @@ -140,15 +140,15 @@ end # TODO, this actually doesn't need to be this big if iter refinement is off -worspace_W_size(S::SparseMatrixCSC{Float64}) = 5 * size(S, 2) -worspace_W_size(S::SparseMatrixCSC{ComplexF64}) = 10 * size(S, 2) +worspace_W_size(S::SparseMatrixCSC{<:AbstractFloat}) = 5 * size(S, 2) +worspace_W_size(S::SparseMatrixCSC{<:Complex}) = 10 * size(S, 2) """ Working space for Umfpack so `ldiv!` doesn't allocate. -To use multiple threads, each thread should have their own workspace that can be allocated using `Base.similar(::UmfpackWS)` and passed as a kwarg to `ldiv!`. -Alternativly see `duplicate(::UmfpackLU)` +To use multiple threads, each thread should have their own workspace that can be allocated using `Base.similar(::UmfpackWS)` +and passed as a kwarg to `ldiv!`. Alternativly see `duplicate(::UmfpackLU)` """ struct UmfpackWS{T<:UMFITypes} Wi::Vector{T} @@ -161,8 +161,8 @@ function Base.resize!(W::UmfpackWS, S::SparseMatrixCSC) end -UmfpackWS(S::SparseMatrixCSC{Tv, Ti}) where {Tv, Ti} = UmfpackWS{Ti}( - Vector{Ti}(undef, size(S, 2)), +UmfpackWS(S::SparseMatrixCSC{Tv,Ti}) where {Tv,Ti} = UmfpackWS{Ti}( + Vector{Ti}(undef, size(S, 2)), Vector{Float64}(undef, worspace_W_size(S))) @@ -185,7 +185,7 @@ mutable struct UmfpackLU{Tv<:UMFVTypes,Ti<:UMFITypes} <: Factorization{Tv} end """ -A shallow copy of UmfpackLU to use simultaniously +A shallow copy of UmfpackLU to use in multithreaded applications """ duplicate(F::UmfpackLU) = UmfpackLU( F.symbolic, @@ -203,31 +203,33 @@ duplicate(F::UmfpackLU) = UmfpackLU( Base.adjoint(F::UmfpackLU) = Adjoint(F) Base.transpose(F::UmfpackLU) = Transpose(F) -function Base.lock(F::UmfpackLU) - if !trylock(F.lock) - @info """waiting for UmfpackLU's lock, it's safe to ignore this message. - see the documentation for Umfpack""" maxlog=1 - lock(F.lock) +# is needed for some reason +function Base.lock(f::Function, F::UmfpackLU) + lock(F) + try + f() + finally + unlock(F) end end +function Base.lock(F::UmfpackLU) + trylock(F.lock) && return + @info """waiting for UmfpackLU's lock, it's safe to ignore this message. + see the documentation for Umfpack""" maxlog = 1 + lock(F.lock) +end @inline Base.trylock(F::UmfpackLU) = trylock(F.lock) @inline Base.unlock(F::UmfpackLU) = unlock(F.lock) -function show_umf_ctrl(F::UmfpackLU, level::Real = 2.0) - lock(F) - try +function show_umf_ctrl(F::UmfpackLU, level::Real=2.0) + lock(F) do show_umf_ctrl(F.control, level) - finally - unlock(F) end end -function show_umf_info(F::UmfpackLU, level::Real = 2.0) - lock(F) - try +function show_umf_info(F::UmfpackLU, level::Real=2.0) + lock(F) do show_umf_info(F.control, F.info, level) - finally - unlock(F) end end @@ -310,12 +312,14 @@ lu(A::Union{Adjoint{T, S}, Transpose{T, S}}; check::Bool = true) where {T<:UMFVT lu(copy(A); check) """ - lu!(F::UmfpackLU, A::SparseMatrixCSC; check=true) -> F::UmfpackLU + lu!(F::UmfpackLU, A::SparseMatrixCSC; check=true, reuse_symbolic=true) -> F::UmfpackLU Compute the LU factorization of a sparse matrix `A`, reusing the symbolic -factorization of an already existing LU factorization stored in `F`. The -sparse matrix `A` must have an identical nonzero pattern as the matrix used -to create the LU factorization `F`, otherwise an error is thrown. +factorization of an already existing LU factorization stored in `F`. +Unless `reuse_symbolic` is set to false, the sparse matrix `A` must have an +identical nonzero pattern as the matrix used to create the LU factorization `F`, +otherwise an error is thrown. If the size of `A` and `F` differ, all vectors will +be resized accordingly. When `check = true`, an error is thrown if the decomposition fails. When `check = false`, responsibility for checking the decomposition's @@ -326,8 +330,8 @@ See also [`lu`](@ref) !!! note `lu!(F::UmfpackLU, A::SparseMatrixCSC)` uses the UMFPACK library that is part of SuiteSparse. As this library only supports sparse matrices with [`Float64`](@ref) or - `ComplexF64` elements, `lu!` converts `A` into a copy that is of type - `SparseMatrixCSC{Float64}` or `SparseMatrixCSC{ComplexF64}` as appropriate. + `ComplexF64` elements, `lu!` will automatically convert the types to those set by the LU + factorization or `SparseMatrixCSC{ComplexF64}` as appropriate. !!! compat "Julia 1.5" `lu!` for `UmfpackLU` requires at least Julia 1.5. @@ -348,7 +352,9 @@ julia> F \\ ones(2) 1.0 ``` """ -function lu!(F::UmfpackLU, S::SparseMatrixCSC{<:UMFVTypes,Ti}; check::Bool=true) where {Ti<:UMFITypes} +function lu!(F::UmfpackLU, S::SparseMatrixCSC{Tv,Ti}; + check::Bool=true, reuse_symbolic::Bool=true) where {Tv,Ti} + zerobased = getcolptr(S)[1] == 0 F.m = size(S, 1) @@ -359,36 +365,30 @@ function lu!(F::UmfpackLU, S::SparseMatrixCSC{<:UMFVTypes,Ti}; check::Bool=true) resize!(F.colptr, length(getcolptr(S))) if zerobased - copy!(F.colptr, getcolptr(S)) + F.colptr .= getcolptr(S) else F.colptr .= getcolptr(S) .- one(Ti) end resize!(F.rowval, length(rowvals(S))) if zerobased - copy!(F.rowval, rowvals(S)) + F.rowval .= rowvals(S) else F.rowval .= rowvals(S) .- one(Ti) end resize!(F.nzval, length(nonzeros(S))) - copy!(F.nzval, nonzeros(S)) + F.nzval .= nonzeros(S) + if !reuse_symbolic && F.symbolic != C_NULL + umfpack_free_symbolic(F) + F.symbolic = C_NULL + end - umfpack_numeric!(F, reuse_numeric = false) + umfpack_numeric!(F, reuse_numeric=false) check && (issuccess(F) || throw(LinearAlgebra.SingularException(0))) return F end -lu!(F::UmfpackLU, A::SparseMatrixCSC{<:Union{Float16,Float32},Ti}; - check::Bool = true) where {Ti<:UMFITypes} = - lu!(F, convert(SparseMatrixCSC{Float64,Ti}, A); check = check) -lu!(F::UmfpackLU, A::SparseMatrixCSC{<:Union{ComplexF16,ComplexF32},Ti}; - check::Bool = true) where {Ti<:UMFITypes} = - lu!(F, convert(SparseMatrixCSC{ComplexF64,Ti}, A); check = check) -lu!(F::UmfpackLU, A::Union{SparseMatrixCSC{T},SparseMatrixCSC{Complex{T}}}; - check::Bool = true) where {T<:AbstractFloat} = - throw(ArgumentError(string("matrix type ", typeof(A), "not supported."))) -lu!(F::UmfpackLU, A::SparseMatrixCSC; check::Bool = true) = lu!(F, float(A); check = check) size(F::UmfpackLU) = (F.m, F.n) function size(F::UmfpackLU, dim::Integer) @@ -480,66 +480,63 @@ for itype in UmfpackIndexTypes get_num_z = Symbol(umf_nm("get_numeric", :ComplexF64, itype)) @eval begin function umfpack_symbolic!(U::UmfpackLU{Float64,$itype}) - if U.symbolic != C_NULL return U end - lock(U) - try - tmp = Vector{Ptr{Cvoid}}(undef, 1) + U.symbolic != C_NULL && return U + + lock(U) do + tmp = Ref{Ptr{Cvoid}}() @isok $sym_r(U.m, U.n, U.colptr, U.rowval, U.nzval, tmp, U.control, U.info) - U.symbolic = tmp[1] - finally - unlock(U) + U.symbolic = tmp[] end return U end function umfpack_symbolic!(U::UmfpackLU{ComplexF64,$itype}) - if U.symbolic != C_NULL return U end - lock(U) - try - tmp = Vector{Ptr{Cvoid}}(undef, 1) + U.symbolic != C_NULL && return U + lock(U) do + tmp = Ref{Ptr{Cvoid}}() @isok $sym_c(U.m, U.n, U.colptr, U.rowval, real(U.nzval), imag(U.nzval), tmp, - U.control, U.info) - U.symbolic = tmp[1] - finally - unlock(U) + U.control, U.info) + U.symbolic = tmp[] end return U end - function umfpack_numeric!(U::UmfpackLU{Float64,$itype}; reuse_numeric = true) - lock(U) - try - if (reuse_numeric && U.numeric != C_NULL) return U end - if U.symbolic == C_NULL umfpack_symbolic!(U) end + function umfpack_numeric!(U::UmfpackLU{Float64,$itype}; reuse_numeric=true) + lock(U) do + if (reuse_numeric && U.numeric != C_NULL) + return U + end + if U.symbolic == C_NULL + umfpack_symbolic!(U) + end - tmp = Vector{Ptr{Cvoid}}(undef, 1) + tmp = Ref{Ptr{Cvoid}}() status = $num_r(U.colptr, U.rowval, U.nzval, U.symbolic, tmp, U.control, U.info) U.status = status if status != UMFPACK_WARNING_singular_matrix umferror(status) end U.numeric != C_NULL && umfpack_free_numeric(U) - U.numeric = tmp[1] - finally - unlock(U) + U.numeric = tmp[] end return U end - function umfpack_numeric!(U::UmfpackLU{ComplexF64,$itype}; reuse_numeric = true) - lock(U) - try - if (reuse_numeric && U.numeric != C_NULL) return U end - if U.symbolic == C_NULL umfpack_symbolic!(U) end + function umfpack_numeric!(U::UmfpackLU{ComplexF64,$itype}; reuse_numeric=true) + lock(U) do + if (reuse_numeric && U.numeric != C_NULL) + return U + end + if U.symbolic == C_NULL + umfpack_symbolic!(U) + end - tmp = Vector{Ptr{Cvoid}}(undef, 1) + tmp = Ref{Ptr{Cvoid}}() status = $num_c(U.colptr, U.rowval, real(U.nzval), imag(U.nzval), U.symbolic, tmp, - U.control, U.info) + U.control, U.info) U.status = status if status != UMFPACK_WARNING_singular_matrix umferror(status) end U.numeric != C_NULL && umfpack_free_numeric(U) - U.numeric = tmp[1] - finally - unlock(U) + U.numeric = tmp[] end return U end @@ -554,16 +551,13 @@ for itype in UmfpackIndexTypes if size(lu, 2) > length(lu.workspace.Wi) throw(ArgumentError("Wi should be at least larger than `size(Af, 2)`")) end - lock(lu) - try + lock(lu) do umfpack_numeric!(lu) - (size(b,1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch()) + (size(b, 1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch()) @isok $wsol_r(typ, lu.colptr, lu.rowval, lu.nzval, - x, b, lu.numeric, lu.control, - lu.info, lu.workspace.Wi, lu.workspace.W) - finally - unlock(lu) + x, b, lu.numeric, lu.control, + lu.info, lu.workspace.Wi, lu.workspace.W) end return x end @@ -578,35 +572,26 @@ for itype in UmfpackIndexTypes if size(lu, 2) > length(lu.workspace.Wi) throw(ArgumentError("Wi should be at least larger than `size(Af, 2)`")) end - lock(lu) - try + lock(lu) do umfpack_numeric!(lu) (size(b, 1) == lu.m) && (size(b) == size(x)) || throw(DimensionMismatch()) @isok $wsol_c(typ, lu.colptr, lu.rowval, lu.nzval, C_NULL, x, C_NULL, b, - C_NULL, lu.numeric, lu.control, lu.info, lu.workspace.Wi, lu.workspace.W) - finally - unlock(lu) + C_NULL, lu.numeric, lu.control, lu.info, lu.workspace.Wi, lu.workspace.W) end return x end function det(lu::UmfpackLU{Float64,$itype}) mx = Ref{Float64}() - lock(lu) - try + lock(lu) do @isok $det_r(mx, C_NULL, lu.numeric, lu.info) - finally - unlock(lu) end mx[] end function det(lu::UmfpackLU{ComplexF64,$itype}) mx = Ref{Float64}() mz = Ref{Float64}() - lock(lu) - try + lock(lu) do @isok $det_z(mx, mz, C_NULL, lu.numeric, lu.info) - finally - unlock(lu) end complex(mx[], mz[]) end @@ -926,28 +911,21 @@ for Tv in (:Float64, :ComplexF64), Ti in UmfpackIndexTypes end function umfpack_report_symbolic(lu::UmfpackLU, level::Real=4.0) - lock(lu) - try + lock(lu) do umfpack_symbolic!(lu) old_prl::Float64 = lu.control[UMFPACK_PRL] lu.ctrol[UMFPACK_PRL] = Float64(level) @isok umfpack_dl_report_symbolic(lu.symbolic, lu.control) lu.control[UMFPACK_PRL] = old_prl - finally - unlock(lu) end end function umfpack_report_numeric(lu::UmfpackLU, level::Real=0.4) - lock(lu) - try + lock(lu) do old_prl::Float64 = lu.control[UMFPACK_PRL] lu.control[UMFPACK_PRL] = Float64(level) @isok umfpack_dl_report_numeric(num, lu.control) lu.control[UMFPACK_PRL] = old_prl - - finally - unlock(lu) end end