diff --git a/Hamiltonian.jl b/Hamiltonian.jl index a206677..c23942b 100644 --- a/Hamiltonian.jl +++ b/Hamiltonian.jl @@ -6,27 +6,23 @@ using TensorOperations, KrylovKit, LinearAlgebra, CUDA, cuTENSOR, NVTX "A Hamiltonian that can be applied to a vector" struct Hamiltonian{T} s::system{T} - K_partial::Matrix{Complex{T}} - K_diag::Union{CuTensor{Complex{T}},Nothing} - K_mixed::Union{CuTensor{Complex{T}},Nothing} - Vs::Union{Array{Complex{T}},CuArray{Complex{T}}} + K::Union{CuTensor{Complex{T}}, Matrix{Complex{T}}} + Vs::Union{Array{Complex{T}}, CuArray{Complex{T}}} hermitian::Bool mode::Hamiltonian_backend function Hamiltonian{T}(s::system{T}, V_twobody::Function, ϕ::Real, n_image::Int, mode::Hamiltonian_backend) where {T<:Float} @assert mode != gpu_cutensor || CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available" - k = -s.N÷2:s.N÷2-1 - Vs = calculate_Vs(s, V_twobody, convert(T, ϕ), n_image) hermitian = ϕ == 0.0 - K_partial = (exp(-im * convert(T, ϕ)) * im / sqrt(2 * s.μ)) .* ∂_1DOF.(Ref(s), k, k') - K_diag = nothing - K_mixed = nothing + Vs = calculate_Vs(s, V_twobody, convert(T, ϕ), n_image) + k = -s.N÷2:s.N÷2-1 + ∂ = ∂_1DOF.(Ref(s), k, k') + K = exp(-2im * convert(T, ϕ)) .* (∂ * ∂) # TODO: Calculate K matrix elements directly if mode == gpu_cutensor - K_diag = CuTensor(CuArray(K_partial * K_partial), ['a', 'A']) - K_mixed = CuTensor(CuArray(K_partial), ['a', 'A']) * CuTensor(CuArray(K_partial), ['b', 'B']) + K = CuTensor(K, ['a', 'A']) Vs = CuArray(Vs) end - return new{T}(s, K_partial, K_diag, K_mixed, Vs, hermitian, mode) + return new{T}(s, K, Vs, hermitian, mode) end end @@ -45,31 +41,23 @@ function LinearAlgebra.mul!(out::Array{Complex{T}}, H::Hamiltonian{T}, v::Array{ coords = H.s.n - 1 nconList_v_template = -collect(1:H.s.d*(coords)) for dim = 1:H.s.d - for coord1 = 1:coords - for coord2 = 1:coord1 - i1 = which_index(H.s, dim, coord1) - i2 = which_index(H.s, dim, coord2) - nconList_1 = [-i1, 1] - nconList_2 = [-i2, 2] - nconList_v = copy(nconList_v_template) - if i1 == i2 - nconList_2[1] = 1 - else - nconList_v[i1] = 1 - end - nconList_v[i2] = 2 - v_new = @ncon((H.K_partial, H.K_partial, v), (nconList_1, nconList_2, nconList_v)) - out = axpy!(1, v_new, out) - end + for coord = 1:coords + i = which_index(H.s, dim, coord) + nconList_K = [-i, 1] + nconList_v = copy(nconList_v_template) + nconList_v[i] = 1 + v_new = @ncon((H.K, v), (nconList_K, nconList_v)) + coeff = -1 / (2 * H.s.μs[coord]) + out = axpy!(coeff, v_new, out) end end return out end "cuTENSOR contraction and accumulation (C = A * B + C)" -function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor - CUTENSOR.contraction!(one(eltype(C)), A.data, A.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, - one(eltype(C)), C.data, C.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, CUTENSOR.CUTENSOR_OP_IDENTITY) +function contract_accumulate!(alpha::Numer, C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor + cuTENSOR.contraction!(alpha, A.data, A.inds, cuTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, cuTENSOR.CUTENSOR_OP_IDENTITY, + one(eltype(C)), C.data, C.inds, cuTENSOR.CUTENSOR_OP_IDENTITY, cuTENSOR.CUTENSOR_OP_IDENTITY) return C end @@ -86,31 +74,16 @@ function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::Hamiltonian{T}, v::CuAr v_t = CuTensor(v, copy(inds_template)) out_t = CuTensor(out, copy(inds_template)) for dim = 1:H.s.d - for coord1 = 1:coords - for coord2 = 1:coord1 - i1 = which_index(H.s, dim, coord1) - i2 = which_index(H.s, dim, coord2) - @assert v_t.inds == inds_template "v indices permuted" - if i1 == i2 - @assert H.K_diag.inds[2] == 'A' "K_diag indices permuted" - H.K_diag.inds[1] = 'a' - 1 + i1 - v_t.inds[i1] = 'A' - #synchronize(ctx) - NVTX.@range "K-diag" out_t = contract_accumulate!(out_t, H.K_diag, v_t) - v_t.inds[i1] = 'a' - 1 + i1 - else - @assert H.K_mixed.inds[2] == 'A' && H.K_mixed.inds[4] == 'B' "K_mixed indices permuted" - H.K_mixed.inds[1] = 'a' - 1 + i1 - H.K_mixed.inds[3] = 'a' - 1 + i2 - # OPTIMIZE: A and B can be swapped - v_t.inds[i1] = 'A' - v_t.inds[i2] = 'B' - #synchronize(ctx) - NVTX.@range "K-mixed" out_t = contract_accumulate!(out_t, H.K_mixed, v_t) - v_t.inds[i1] = 'a' - 1 + i1 - v_t.inds[i2] = 'a' - 1 + i2 - end - end + for coord = 1:coords + i = which_index(H.s, dim, coord) + @assert v_t.inds == inds_template "v indices permuted" + @assert H.K_diag.inds[2] == 'A' "K_diag indices permuted" + H.K.inds[1] = 'a' - 1 + i + v_t.inds[i] = 'A' + #synchronize(ctx) + coeff = -1 / (2 * H.s.μs[coord]) + NVTX.@range "K" out_t = contract_accumulate!(coeff, out_t, H.K, v_t) + v_t.inds[i] = 'a' - 1 + i end end @assert out_t.inds == inds_template "out indices permuted" diff --git a/common.jl b/common.jl index aae050d..36f3d22 100644 --- a/common.jl +++ b/common.jl @@ -1,18 +1,33 @@ Float = Union{Float32,Float64} +norm_square(x) = sum(x .* x) +reducedMass(m1::Float, m2::Float) = 1 / (1/m1 + 1/m2) + "A few-body system defined by its physical parameters" struct system{T} d::Int n::Int N::Int L::T - μ::T + μs::Vector{T} + invU::Matrix{T} - system{T}(d::Int, n::Int, N::Int, L::Real, μ::Real=0.5) where {T<:Float} = new{T}(d, n, N, convert(T, L), convert(T, μ)) + function system{T}(d::Int, n::Int, N::Int, L::Real) where {T<:Float} + μs = collect(1:n) ./ collect(2:(n + 1)) + + U = zeros(T, n, n) + for coord in 1:n + U[coord, 1:coord] .= 1 / coord + if coord != n + U[coord, coord + 1] = -1 + end + end + invU = inv(U)[:, 1:(n - 1)] + + return new{T}(d, n, N, convert(T, L), μs, invU) + end end -norm_square(x::Array{Int})::Int = sum(x .* x) - "Eq (46): Partial derivative matrix element for 1 degree of freedom" function ∂_1DOF(s::system{T}, k::Int, l::Int)::Complex{T} where {T<:Float} if k == l @@ -22,19 +37,17 @@ function ∂_1DOF(s::system{T}, k::Int, l::Int)::Complex{T} where {T<:Float} end end -"Which index (dimension of the multidimensional array) corresponds to spatial dimension 'dim' and particle 'p'?" -which_index(s::system, dim::Int, p::Int)::Int = (dim - 1) * (s.n - 1) + p +"Which index (dimension of the multidimensional array) corresponds to spatial dimension 'dim' of coordinate 'coord'?" +which_index(s::system, dim::Int, coord::Int)::Int = (dim - 1) * (s.n - 1) + coord -"Δk (distance in terms of lattice paramter) between two particles along the given dimension" -function get_Δk(s::system, i::CartesianIndex, dim::Int, p1::Int, p2::Int)::Int - if p1 == p2 - return 0 - elseif p1 == s.n - return -(i[which_index(s, dim, p2)] - s.N ÷ 2 - 1) - elseif p2 == s.n - return i[which_index(s, dim, p1)] - s.N ÷ 2 - 1 +"Get the distance to the nearest image of the particle" +function nearest(s, Δk) + if Δk > s.N ÷ 2 + return Δk - s.N + elseif Δk < -s.N ÷ 2 + return Δk + s.N else - return i[which_index(s, dim, p1)] - i[which_index(s, dim, p2)] + return Δk end end @@ -44,21 +57,16 @@ function calculate_Vs(s::system{T}, V_twobody::Function, ϕ::T, n_image::Int)::A images = collect.(Iterators.product(fill(-n_image:n_image, s.d)...)) # TODO: Learn how to use tuples instead of vectors Vs = zeros(Complex{T}, fill(s.N, s.d * (s.n - 1))...) Threads.@threads for i in CartesianIndices(Vs) + xs = reshape(collect(Tuple(i)), s.n - 1, s.d) .- (s.N ÷ 2 - 1) + rs = s.invU * xs for p1 in 1:s.n - for p2 in (p1 + 1):s.n - min_Δk = Array{Int}(undef, s.d) + for p2 in 1:(p1 - 1) + Δk = Array{T}(undef, s.d) for dim in 1:s.d - Δk = get_Δk(s, i, dim, p1, p2) - if Δk > s.N ÷ 2 - min_Δk[dim] = Δk - s.N - elseif Δk < -s.N ÷ 2 - min_Δk[dim] = Δk + s.N - else - min_Δk[dim] = Δk - end + Δk[dim] = nearest(s, rs[p1, dim] - rs[p2, dim]) end for image in images - Δk² = norm_square(min_Δk .- (s.N .* image)) + Δk² = norm_square(Δk .- (s.N .* image)) Vs[i] += V_twobody(Δk² * coeff²) end end