Better cache preallocation (performace degraded!)
This commit is contained in:
parent
63392d3bc3
commit
791485a391
|
|
@ -18,9 +18,6 @@ E_max = 40
|
||||||
println("No of threads = ", Threads.nthreads())
|
println("No of threads = ", Threads.nthreads())
|
||||||
|
|
||||||
basis = ho_basis_2B(E_max, Λ)
|
basis = ho_basis_2B(E_max, Λ)
|
||||||
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
|
|
||||||
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
|
|
||||||
|
|
||||||
println("Basis size = ", basis.dim)
|
println("Basis size = ", basis.dim)
|
||||||
|
|
||||||
@time "T1" T1 = get_sp_T_matrix(basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; μω_gen=μ1ω1, μ=μ1)
|
@time "T1" T1 = get_sp_T_matrix(basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; μω_gen=μ1ω1, μ=μ1)
|
||||||
|
|
|
||||||
|
|
@ -19,9 +19,6 @@ E_max = 40
|
||||||
println("No of threads = ", Threads.nthreads())
|
println("No of threads = ", Threads.nthreads())
|
||||||
|
|
||||||
basis = ho_basis_2B(E_max, Λ)
|
basis = ho_basis_2B(E_max, Λ)
|
||||||
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
|
|
||||||
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
|
|
||||||
|
|
||||||
println("Basis size = ", basis.dim)
|
println("Basis size = ", basis.dim)
|
||||||
|
|
||||||
@time "T1" T1 = get_sp_T_matrix(basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; μω_gen=μ1ω1, μ=μ1)
|
@time "T1" T1 = get_sp_T_matrix(basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; μω_gen=μ1ω1, μ=μ1)
|
||||||
|
|
|
||||||
|
|
@ -21,9 +21,6 @@ E_max = 40
|
||||||
println("No of threads = ", Threads.nthreads())
|
println("No of threads = ", Threads.nthreads())
|
||||||
|
|
||||||
basis = ho_basis_2B(E_max, Λ)
|
basis = ho_basis_2B(E_max, Λ)
|
||||||
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
|
|
||||||
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
|
|
||||||
|
|
||||||
println("Basis size = ", basis.dim)
|
println("Basis size = ", basis.dim)
|
||||||
|
|
||||||
@time "T1" T1 = get_sp_T_matrix(basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; μω_gen=μ1ω1, μ=μ1)
|
@time "T1" T1 = get_sp_T_matrix(basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; μω_gen=μ1ω1, μ=μ1)
|
||||||
|
|
|
||||||
40
ho_basis.jl
40
ho_basis.jl
|
|
@ -32,6 +32,7 @@ end
|
||||||
|
|
||||||
"2-body HO basis"
|
"2-body HO basis"
|
||||||
struct ho_basis_2B
|
struct ho_basis_2B
|
||||||
|
E_max::Int
|
||||||
Λ::Int
|
Λ::Int
|
||||||
dim::Int # dimensionality of the basis
|
dim::Int # dimensionality of the basis
|
||||||
Es::Vector{Int}
|
Es::Vector{Int}
|
||||||
|
|
@ -64,10 +65,22 @@ struct ho_basis_2B
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
return new(Λ, length(Es), Es, n1s, l1s, n2s, l2s)
|
return new(E_max, Λ, length(Es), Es, n1s, l1s, n2s, l2s)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
"Number of possible distinct matrix elements for a given l."
|
||||||
|
function cache_size_l(E_max::Int, l::Int)::Int
|
||||||
|
n_max = (E_max - l) ÷ 2
|
||||||
|
return (n_max * (n_max + 1)) ÷ 2
|
||||||
|
end
|
||||||
|
|
||||||
|
"Number of possible distinct matrix elements for all l."
|
||||||
|
cache_size(E_max::Int)::Int = sum(l -> cache_size_l(E_max, l), 0 : E_max)
|
||||||
|
|
||||||
|
"Preallocation of cache for PE matrix elements."
|
||||||
|
prealloc_V_cache(E_max::Int, dtype::DataType=Float64) = LRU{Tuple{UInt8, UInt8, UInt8}, dtype}(maxsize=cache_size(E_max))
|
||||||
|
|
||||||
function V_numerical(V_of_r, l, n1, n2; μω_gen=1.0, atol=0, maxevals=10^7)
|
function V_numerical(V_of_r, l, n1, n2; μω_gen=1.0, atol=0, maxevals=10^7)
|
||||||
const_part = sqrt(μω_gen) * ho_basis_const(l, n1) * ho_basis_const(l, n2)
|
const_part = sqrt(μω_gen) * ho_basis_const(l, n1) * ho_basis_const(l, n2)
|
||||||
integrand(r) = ho_basis_func(l, n1, sqrt(μω_gen) * r) * ho_basis_func(l, n2, sqrt(μω_gen) * r) * V_of_r(r)
|
integrand(r) = ho_basis_func(l, n1, sqrt(μω_gen) * r) * ho_basis_func(l, n2, sqrt(μω_gen) * r) * V_of_r(r)
|
||||||
|
|
@ -96,7 +109,7 @@ end
|
||||||
|
|
||||||
"PE matrix for a single DOF. Set kron_deltas=[other quantum numbers] for other DOFs which the operator does not act on.
|
"PE matrix for a single DOF. Set kron_deltas=[other quantum numbers] for other DOFs which the operator does not act on.
|
||||||
E.g. get_sp_V_matrix(n1s, l1s, kron_deltas=[n2s l2s])"
|
E.g. get_sp_V_matrix(n1s, l1s, kron_deltas=[n2s l2s])"
|
||||||
function get_sp_V_matrix(V_l, ns, ls, kron_deltas=[]; dtype=Float64, cache=LRU{Tuple{UInt8, UInt8, UInt8}, dtype}(maxsize=(1+maximum(ns))^2))
|
function get_sp_V_matrix(V_l, ns, ls, kron_deltas=[]; dtype=Float64, cache=prealloc_V_cache(maximum(ls), dtype))
|
||||||
mat = zeros(dtype, length(ns), length(ns))
|
mat = zeros(dtype, length(ns), length(ns))
|
||||||
Threads.@threads for idx in CartesianIndices(mat)
|
Threads.@threads for idx in CartesianIndices(mat)
|
||||||
(i, j) = Tuple(idx)
|
(i, j) = Tuple(idx)
|
||||||
|
|
@ -156,22 +169,14 @@ function get_jacobi_V_matrix(V_of_r, basis::ho_basis_2B, μ1ω1, μω_global; at
|
||||||
end
|
end
|
||||||
|
|
||||||
function get_jacobi_V1_matrix(V_of_r, basis::ho_basis_2B, μ1ω1; atol=10^-6, maxevals=10^5)
|
function get_jacobi_V1_matrix(V_of_r, basis::ho_basis_2B, μ1ω1; atol=10^-6, maxevals=10^5)
|
||||||
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
|
|
||||||
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
|
|
||||||
|
|
||||||
V1_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μ1ω1, atol=atol, maxevals=maxevals)
|
V1_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μ1ω1, atol=atol, maxevals=maxevals)
|
||||||
V1_cache = LRU{Tuple{UInt8, UInt8, UInt8}, ComplexF64}(maxsize=(1+l_max)*(1+n_max)^2)
|
V1 = get_sp_V_matrix(V1_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64)
|
||||||
V1 = get_sp_V_matrix(V1_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64, cache=V1_cache)
|
|
||||||
|
|
||||||
return V1
|
return V1
|
||||||
end
|
end
|
||||||
|
|
||||||
function get_jacobi_V2_matrix(V_of_r, basis::ho_basis_2B, μω_global; atol=10^-6, maxevals=10^5)
|
function get_jacobi_V2_matrix(V_of_r, basis::ho_basis_2B, μω_global; atol=10^-6, maxevals=10^5)
|
||||||
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
|
|
||||||
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
|
|
||||||
|
|
||||||
V_relative_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μω_global, atol=atol, maxevals=maxevals)
|
V_relative_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μω_global, atol=atol, maxevals=maxevals)
|
||||||
V_relative_cache = LRU{Tuple{UInt8, UInt8, UInt8}, ComplexF64}(maxsize=(1+l_max)*(1+n_max)^2)
|
V_relative_cache = prealloc_V_cache(basis.E_max, ComplexF64)
|
||||||
|
|
||||||
V_relative = get_sp_V_matrix(V_relative_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64, cache=V_relative_cache) + get_sp_V_matrix(V_relative_elem, basis.n2s, basis.l2s, [basis.n1s, basis.l1s]; dtype=ComplexF64, cache=V_relative_cache)
|
V_relative = get_sp_V_matrix(V_relative_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64, cache=V_relative_cache) + get_sp_V_matrix(V_relative_elem, basis.n2s, basis.l2s, [basis.n1s, basis.l1s]; dtype=ComplexF64, cache=V_relative_cache)
|
||||||
U = Moshinsky_transform(basis)
|
U = Moshinsky_transform(basis)
|
||||||
|
|
@ -195,11 +200,8 @@ function get_2p_p1p2_matrix(basis::ho_basis_2B, μ1ω1, μ2ω2; dtype=Float64)
|
||||||
end
|
end
|
||||||
|
|
||||||
function get_src_V_matrix(V_of_r, basis::ho_basis_2B, μω, μω_global; atol=10^-6, maxevals=10^5)
|
function get_src_V_matrix(V_of_r, basis::ho_basis_2B, μω, μω_global; atol=10^-6, maxevals=10^5)
|
||||||
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
|
|
||||||
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
|
|
||||||
|
|
||||||
V_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μω, atol=atol, maxevals=maxevals)
|
V_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μω, atol=atol, maxevals=maxevals)
|
||||||
V_cache = LRU{Tuple{UInt8, UInt8, UInt8}, ComplexF64}(maxsize=(1+l_max)*(1+n_max)^2)
|
V_cache = prealloc_V_cache(basis.E_max, ComplexF64)
|
||||||
|
|
||||||
V1 = get_sp_V_matrix(V_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64, cache=V_cache)
|
V1 = get_sp_V_matrix(V_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64, cache=V_cache)
|
||||||
V2 = get_sp_V_matrix(V_elem, basis.n2s, basis.l2s, [basis.n1s, basis.l1s]; dtype=ComplexF64, cache=V_cache)
|
V2 = get_sp_V_matrix(V_elem, basis.n2s, basis.l2s, [basis.n1s, basis.l1s]; dtype=ComplexF64, cache=V_cache)
|
||||||
|
|
@ -210,13 +212,9 @@ function get_src_V_matrix(V_of_r, basis::ho_basis_2B, μω, μω_global; atol=10
|
||||||
end
|
end
|
||||||
|
|
||||||
function get_src_V12_matrix(V_of_r, basis::ho_basis_2B, μω_global; atol=10^-6, maxevals=10^5)
|
function get_src_V12_matrix(V_of_r, basis::ho_basis_2B, μω_global; atol=10^-6, maxevals=10^5)
|
||||||
l_max = max(maximum(basis.l1s), maximum(basis.l2s))
|
|
||||||
n_max = max(maximum(basis.n1s), maximum(basis.n2s))
|
|
||||||
|
|
||||||
V_relative_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μω_global, atol=atol, maxevals=maxevals)
|
V_relative_elem(l, n1, n2) = V_numerical(V_of_r, l, n1, n2; μω_gen=μω_global, atol=atol, maxevals=maxevals)
|
||||||
V_relative_cache = LRU{Tuple{UInt8, UInt8, UInt8}, ComplexF64}(maxsize=(1+l_max)*(1+n_max)^2)
|
V_relative = get_sp_V_matrix(V_relative_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64)
|
||||||
|
|
||||||
V_relative = get_sp_V_matrix(V_relative_elem, basis.n1s, basis.l1s, [basis.n2s, basis.l2s]; dtype=ComplexF64, cache=V_relative_cache)
|
|
||||||
U = Moshinsky_transform(basis)
|
U = Moshinsky_transform(basis)
|
||||||
V12 = U' * V_relative * U
|
V12 = U' * V_relative * U
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue