First Jacobi implementation

This commit is contained in:
ysyapa 2023-08-28 03:32:41 +00:00
parent b21242ad49
commit ad5bac2bf2
2 changed files with 63 additions and 82 deletions

View File

@ -6,27 +6,23 @@ using TensorOperations, KrylovKit, LinearAlgebra, CUDA, cuTENSOR, NVTX
"A Hamiltonian that can be applied to a vector" "A Hamiltonian that can be applied to a vector"
struct Hamiltonian{T} struct Hamiltonian{T}
s::system{T} s::system{T}
K_partial::Matrix{Complex{T}} K::Union{CuTensor{Complex{T}}, Matrix{Complex{T}}}
K_diag::Union{CuTensor{Complex{T}},Nothing}
K_mixed::Union{CuTensor{Complex{T}},Nothing}
Vs::Union{Array{Complex{T}}, CuArray{Complex{T}}} Vs::Union{Array{Complex{T}}, CuArray{Complex{T}}}
hermitian::Bool hermitian::Bool
mode::Hamiltonian_backend mode::Hamiltonian_backend
function Hamiltonian{T}(s::system{T}, V_twobody::Function, ϕ::Real, n_image::Int, mode::Hamiltonian_backend) where {T<:Float} function Hamiltonian{T}(s::system{T}, V_twobody::Function, ϕ::Real, n_image::Int, mode::Hamiltonian_backend) where {T<:Float}
@assert mode != gpu_cutensor || CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available" @assert mode != gpu_cutensor || CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available"
k = -s.N÷2:s.N÷2-1
Vs = calculate_Vs(s, V_twobody, convert(T, ϕ), n_image)
hermitian = ϕ == 0.0 hermitian = ϕ == 0.0
K_partial = (exp(-im * convert(T, ϕ)) * im / sqrt(2 * s.μ)) .* ∂_1DOF.(Ref(s), k, k') Vs = calculate_Vs(s, V_twobody, convert(T, ϕ), n_image)
K_diag = nothing k = -s.N÷2:s.N÷2-1
K_mixed = nothing = ∂_1DOF.(Ref(s), k, k')
K = exp(-2im * convert(T, ϕ)) .* ( * ) # TODO: Calculate K matrix elements directly
if mode == gpu_cutensor if mode == gpu_cutensor
K_diag = CuTensor(CuArray(K_partial * K_partial), ['a', 'A']) K = CuTensor(K, ['a', 'A'])
K_mixed = CuTensor(CuArray(K_partial), ['a', 'A']) * CuTensor(CuArray(K_partial), ['b', 'B'])
Vs = CuArray(Vs) Vs = CuArray(Vs)
end end
return new{T}(s, K_partial, K_diag, K_mixed, Vs, hermitian, mode) return new{T}(s, K, Vs, hermitian, mode)
end end
end end
@ -45,31 +41,23 @@ function LinearAlgebra.mul!(out::Array{Complex{T}}, H::Hamiltonian{T}, v::Array{
coords = H.s.n - 1 coords = H.s.n - 1
nconList_v_template = -collect(1:H.s.d*(coords)) nconList_v_template = -collect(1:H.s.d*(coords))
for dim = 1:H.s.d for dim = 1:H.s.d
for coord1 = 1:coords for coord = 1:coords
for coord2 = 1:coord1 i = which_index(H.s, dim, coord)
i1 = which_index(H.s, dim, coord1) nconList_K = [-i, 1]
i2 = which_index(H.s, dim, coord2)
nconList_1 = [-i1, 1]
nconList_2 = [-i2, 2]
nconList_v = copy(nconList_v_template) nconList_v = copy(nconList_v_template)
if i1 == i2 nconList_v[i] = 1
nconList_2[1] = 1 v_new = @ncon((H.K, v), (nconList_K, nconList_v))
else coeff = -1 / (2 * H.s.μs[coord])
nconList_v[i1] = 1 out = axpy!(coeff, v_new, out)
end
nconList_v[i2] = 2
v_new = @ncon((H.K_partial, H.K_partial, v), (nconList_1, nconList_2, nconList_v))
out = axpy!(1, v_new, out)
end
end end
end end
return out return out
end end
"cuTENSOR contraction and accumulation (C = A * B + C)" "cuTENSOR contraction and accumulation (C = A * B + C)"
function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor function contract_accumulate!(alpha::Numer, C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor
CUTENSOR.contraction!(one(eltype(C)), A.data, A.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, cuTENSOR.contraction!(alpha, A.data, A.inds, cuTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, cuTENSOR.CUTENSOR_OP_IDENTITY,
one(eltype(C)), C.data, C.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, CUTENSOR.CUTENSOR_OP_IDENTITY) one(eltype(C)), C.data, C.inds, cuTENSOR.CUTENSOR_OP_IDENTITY, cuTENSOR.CUTENSOR_OP_IDENTITY)
return C return C
end end
@ -86,31 +74,16 @@ function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::Hamiltonian{T}, v::CuAr
v_t = CuTensor(v, copy(inds_template)) v_t = CuTensor(v, copy(inds_template))
out_t = CuTensor(out, copy(inds_template)) out_t = CuTensor(out, copy(inds_template))
for dim = 1:H.s.d for dim = 1:H.s.d
for coord1 = 1:coords for coord = 1:coords
for coord2 = 1:coord1 i = which_index(H.s, dim, coord)
i1 = which_index(H.s, dim, coord1)
i2 = which_index(H.s, dim, coord2)
@assert v_t.inds == inds_template "v indices permuted" @assert v_t.inds == inds_template "v indices permuted"
if i1 == i2
@assert H.K_diag.inds[2] == 'A' "K_diag indices permuted" @assert H.K_diag.inds[2] == 'A' "K_diag indices permuted"
H.K_diag.inds[1] = 'a' - 1 + i1 H.K.inds[1] = 'a' - 1 + i
v_t.inds[i1] = 'A' v_t.inds[i] = 'A'
#synchronize(ctx) #synchronize(ctx)
NVTX.@range "K-diag" out_t = contract_accumulate!(out_t, H.K_diag, v_t) coeff = -1 / (2 * H.s.μs[coord])
v_t.inds[i1] = 'a' - 1 + i1 NVTX.@range "K" out_t = contract_accumulate!(coeff, out_t, H.K, v_t)
else v_t.inds[i] = 'a' - 1 + i
@assert H.K_mixed.inds[2] == 'A' && H.K_mixed.inds[4] == 'B' "K_mixed indices permuted"
H.K_mixed.inds[1] = 'a' - 1 + i1
H.K_mixed.inds[3] = 'a' - 1 + i2
# OPTIMIZE: A and B can be swapped
v_t.inds[i1] = 'A'
v_t.inds[i2] = 'B'
#synchronize(ctx)
NVTX.@range "K-mixed" out_t = contract_accumulate!(out_t, H.K_mixed, v_t)
v_t.inds[i1] = 'a' - 1 + i1
v_t.inds[i2] = 'a' - 1 + i2
end
end
end end
end end
@assert out_t.inds == inds_template "out indices permuted" @assert out_t.inds == inds_template "out indices permuted"

View File

@ -1,17 +1,32 @@
Float = Union{Float32,Float64} Float = Union{Float32,Float64}
norm_square(x) = sum(x .* x)
reducedMass(m1::Float, m2::Float) = 1 / (1/m1 + 1/m2)
"A few-body system defined by its physical parameters" "A few-body system defined by its physical parameters"
struct system{T} struct system{T}
d::Int d::Int
n::Int n::Int
N::Int N::Int
L::T L::T
μ::T μs::Vector{T}
invU::Matrix{T}
system{T}(d::Int, n::Int, N::Int, L::Real, μ::Real=0.5) where {T<:Float} = new{T}(d, n, N, convert(T, L), convert(T, μ)) function system{T}(d::Int, n::Int, N::Int, L::Real) where {T<:Float}
μs = collect(1:n) ./ collect(2:(n + 1))
U = zeros(T, n, n)
for coord in 1:n
U[coord, 1:coord] .= 1 / coord
if coord != n
U[coord, coord + 1] = -1
end end
end
invU = inv(U)[:, 1:(n - 1)]
norm_square(x::Array{Int})::Int = sum(x .* x) return new{T}(d, n, N, convert(T, L), μs, invU)
end
end
"Eq (46): Partial derivative matrix element for 1 degree of freedom" "Eq (46): Partial derivative matrix element for 1 degree of freedom"
function ∂_1DOF(s::system{T}, k::Int, l::Int)::Complex{T} where {T<:Float} function ∂_1DOF(s::system{T}, k::Int, l::Int)::Complex{T} where {T<:Float}
@ -22,19 +37,17 @@ function ∂_1DOF(s::system{T}, k::Int, l::Int)::Complex{T} where {T<:Float}
end end
end end
"Which index (dimension of the multidimensional array) corresponds to spatial dimension 'dim' and particle 'p'?" "Which index (dimension of the multidimensional array) corresponds to spatial dimension 'dim' of coordinate 'coord'?"
which_index(s::system, dim::Int, p::Int)::Int = (dim - 1) * (s.n - 1) + p which_index(s::system, dim::Int, coord::Int)::Int = (dim - 1) * (s.n - 1) + coord
"Δk (distance in terms of lattice paramter) between two particles along the given dimension" "Get the distance to the nearest image of the particle"
function get_Δk(s::system, i::CartesianIndex, dim::Int, p1::Int, p2::Int)::Int function nearest(s, Δk)
if p1 == p2 if Δk > s.N ÷ 2
return 0 return Δk - s.N
elseif p1 == s.n elseif Δk < -s.N ÷ 2
return -(i[which_index(s, dim, p2)] - s.N ÷ 2 - 1) return Δk + s.N
elseif p2 == s.n
return i[which_index(s, dim, p1)] - s.N ÷ 2 - 1
else else
return i[which_index(s, dim, p1)] - i[which_index(s, dim, p2)] return Δk
end end
end end
@ -44,21 +57,16 @@ function calculate_Vs(s::system{T}, V_twobody::Function, ϕ::T, n_image::Int)::A
images = collect.(Iterators.product(fill(-n_image:n_image, s.d)...)) # TODO: Learn how to use tuples instead of vectors images = collect.(Iterators.product(fill(-n_image:n_image, s.d)...)) # TODO: Learn how to use tuples instead of vectors
Vs = zeros(Complex{T}, fill(s.N, s.d * (s.n - 1))...) Vs = zeros(Complex{T}, fill(s.N, s.d * (s.n - 1))...)
Threads.@threads for i in CartesianIndices(Vs) Threads.@threads for i in CartesianIndices(Vs)
xs = reshape(collect(Tuple(i)), s.n - 1, s.d) .- (s.N ÷ 2 - 1)
rs = s.invU * xs
for p1 in 1:s.n for p1 in 1:s.n
for p2 in (p1 + 1):s.n for p2 in 1:(p1 - 1)
min_Δk = Array{Int}(undef, s.d) Δk = Array{T}(undef, s.d)
for dim in 1:s.d for dim in 1:s.d
Δk = get_Δk(s, i, dim, p1, p2) Δk[dim] = nearest(s, rs[p1, dim] - rs[p2, dim])
if Δk > s.N ÷ 2
min_Δk[dim] = Δk - s.N
elseif Δk < -s.N ÷ 2
min_Δk[dim] = Δk + s.N
else
min_Δk[dim] = Δk
end
end end
for image in images for image in images
Δk² = norm_square(min_Δk .- (s.N .* image)) Δk² = norm_square(Δk .- (s.N .* image))
Vs[i] += V_twobody(Δk² * coeff²) Vs[i] += V_twobody(Δk² * coeff²)
end end
end end