Better cache preallocation (performace degraded!)

This commit is contained in:
Nuwan Yapa 2024-12-11 20:45:59 -05:00
parent 63392d3bc3
commit 791485a391
4 changed files with 21 additions and 32 deletions

View File

@ -18,9 +18,6 @@ E_max = 40
println("No of threads = ", Threads.nthreads())
basis = ho_basis_2B(E_max, Λ)
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
println("Basis size = ", basis.dim)
@time "T1" T1 = get_sp_T_matrix(basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; μω_gen=μ1ω1, μ=μ1)

View File

@ -19,9 +19,6 @@ E_max = 40
println("No of threads = ", Threads.nthreads())
basis = ho_basis_2B(E_max, Λ)
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
println("Basis size = ", basis.dim)
@time "T1" T1 = get_sp_T_matrix(basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; μω_gen=μ1ω1, μ=μ1)

View File

@ -21,9 +21,6 @@ E_max = 40
println("No of threads = ", Threads.nthreads())
basis = ho_basis_2B(E_max, Λ)
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
println("Basis size = ", basis.dim)
@time "T1" T1 = get_sp_T_matrix(basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; μω_gen=μ1ω1, μ=μ1)

View File

@ -32,6 +32,7 @@ end
"2-body HO basis"
struct ho_basis_2B
E_max::Int
Λ::Int
dim::Int # dimensionality of the basis
Es::Vector{Int}
@ -64,10 +65,22 @@ struct ho_basis_2B
end
end
return new(Λ, length(Es), Es, n1s, l1s, n2s, l2s)
return new(E_max, Λ, length(Es), Es, n1s, l1s, n2s, l2s)
end
end
"Number of possible distinct matrix elements for a given l."
function cache_size_l(E_max::Int, l::Int)::Int
n_max = (E_max - l) ÷ 2
return (n_max * (n_max + 1)) ÷ 2
end
"Number of possible distinct matrix elements for all l."
cache_size(E_max::Int)::Int = sum(l -> cache_size_l(E_max, l), 0 : E_max)
"Preallocation of cache for PE matrix elements."
prealloc_V_cache(E_max::Int, dtype::DataType=Float64) = LRU{Tuple{UInt8, UInt8, UInt8}, dtype}(maxsize=cache_size(E_max))
function V_numerical(V_of_r, l, n1, n2; μω_gen=1.0, atol=0, maxevals=10^7)
const_part = sqrt(μω_gen) * ho_basis_const(l, n1) * ho_basis_const(l, n2)
integrand(r) = ho_basis_func(l, n1, sqrt(μω_gen) * r) * ho_basis_func(l, n2, sqrt(μω_gen) * r) * V_of_r(r)
@ -96,7 +109,7 @@ end
"PE matrix for a single DOF. Set kron_deltas=[other quantum numbers] for other DOFs which the operator does not act on.
E.g. get_sp_V_matrix(n1s, l1s, kron_deltas=[n2s l2s])"
function get_sp_V_matrix(V_l, ns, ls, kron_deltas=[]; dtype=Float64, cache=LRU{Tuple{UInt8, UInt8, UInt8}, dtype}(maxsize=(1+maximum(ns))^2))
function get_sp_V_matrix(V_l, ns, ls, kron_deltas=[]; dtype=Float64, cache=prealloc_V_cache(maximum(ls), dtype))
mat = zeros(dtype, length(ns), length(ns))
Threads.@threads for idx in CartesianIndices(mat)
(i, j) = Tuple(idx)
@ -156,22 +169,14 @@ function get_jacobi_V_matrix(V_of_r, basis::ho_basis_2B, μ1ω1, μω_global; at
end
function get_jacobi_V1_matrix(V_of_r, basis::ho_basis_2B, μ1ω1; atol=10^-6, maxevals=10^5)
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
V1_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μ1ω1, atol=atol, maxevals=maxevals)
V1_cache = LRU{Tuple{UInt8, UInt8, UInt8}, ComplexF64}(maxsize=(1+l_max)*(1+n_max)^2)
V1 = get_sp_V_matrix(V1_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64, cache=V1_cache)
V1 = get_sp_V_matrix(V1_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64)
return V1
end
function get_jacobi_V2_matrix(V_of_r, basis::ho_basis_2B, μω_global; atol=10^-6, maxevals=10^5)
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
function get_jacobi_V2_matrix(V_of_r, basis::ho_basis_2B, μω_global; atol=10^-6, maxevals=10^5)
V_relative_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μω_global, atol=atol, maxevals=maxevals)
V_relative_cache = LRU{Tuple{UInt8, UInt8, UInt8}, ComplexF64}(maxsize=(1+l_max)*(1+n_max)^2)
V_relative_cache = prealloc_V_cache(basis.E_max, ComplexF64)
V_relative = get_sp_V_matrix(V_relative_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64, cache=V_relative_cache) + get_sp_V_matrix(V_relative_elem, basis.n2s, basis.l2s, [basis.n1s, basis.l1s]; dtype=ComplexF64, cache=V_relative_cache)
U = Moshinsky_transform(basis)
@ -194,12 +199,9 @@ function get_2p_p1p2_matrix(basis::ho_basis_2B, μ1ω1, μ2ω2; dtype=Float64)
return sparse(mat)
end
function get_src_V_matrix(V_of_r, basis::ho_basis_2B, μω, μω_global; atol=10^-6, maxevals=10^5)
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
function get_src_V_matrix(V_of_r, basis::ho_basis_2B, μω, μω_global; atol=10^-6, maxevals=10^5)
V_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μω, atol=atol, maxevals=maxevals)
V_cache = LRU{Tuple{UInt8, UInt8, UInt8}, ComplexF64}(maxsize=(1+l_max)*(1+n_max)^2)
V_cache = prealloc_V_cache(basis.E_max, ComplexF64)
V1 = get_sp_V_matrix(V_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64, cache=V_cache)
V2 = get_sp_V_matrix(V_elem, basis.n2s, basis.l2s, [basis.n1s, basis.l1s]; dtype=ComplexF64, cache=V_cache)
@ -210,13 +212,9 @@ function get_src_V_matrix(V_of_r, basis::ho_basis_2B, μω, μω_global; atol=10
end
function get_src_V12_matrix(V_of_r, basis::ho_basis_2B, μω_global; atol=10^-6, maxevals=10^5)
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
V_relative_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μω_global, atol=atol, maxevals=maxevals)
V_relative_cache = LRU{Tuple{UInt8, UInt8, UInt8}, ComplexF64}(maxsize=(1+l_max)*(1+n_max)^2)
V_relative = get_sp_V_matrix(V_relative_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64)
V_relative = get_sp_V_matrix(V_relative_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64, cache=V_relative_cache)
U = Moshinsky_transform(basis)
V12 = U' * V_relative * U