152 lines
6.3 KiB
Julia
152 lines
6.3 KiB
Julia
include("common.jl")
|
|
using TensorOperations, KrylovKit, LinearAlgebra, CUDA, cuTENSOR, NVTX
|
|
|
|
@enum Hamiltonian_backend cpu_tensor gpu_cutensor
|
|
|
|
"A Hamiltonian that can be applied to a vector"
|
|
struct Hamiltonian{T}
|
|
s::system{T}
|
|
K_partial::Matrix{Complex{T}}
|
|
K_diag::Union{CuTensor{Complex{T}},Nothing}
|
|
K_mixed::Union{CuTensor{Complex{T}},Nothing}
|
|
K_partial_1::Union{Tuple,Nothing}
|
|
Vs::Union{Array{Complex{T}},CuArray{Complex{T}}}
|
|
hermitian::Bool
|
|
mode::Hamiltonian_backend
|
|
|
|
function Hamiltonian{T}(s::system{T}, V_twobody::Function, ϕ::Real, n_image::Int, mode::Hamiltonian_backend) where {T<:Float}
|
|
@assert mode != gpu_cutensor || CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available"
|
|
k = -s.N÷2:s.N÷2-1
|
|
hermitian = ϕ == 0.0
|
|
K_partial = (exp(-im * convert(T, ϕ)) * im / sqrt(2 * s.μ)) .* ∂_1DOF.(Ref(s), k, k')
|
|
K_partial_1 = sym_reduce(s, K_partial)
|
|
Vs = calculate_Vs(s, V_twobody, convert(T, ϕ), n_image)
|
|
if mode == gpu_cutensor
|
|
K_diag = CuTensor(CuArray(K_partial * K_partial), ['a', 'A'])
|
|
K_mixed = CuTensor(CuArray(K_partial), ['a', 'A']) * CuTensor(CuArray(K_partial), ['b', 'B'])
|
|
Vs = CuArray(Vs)
|
|
else
|
|
K_diag = nothing
|
|
K_mixed = nothing
|
|
end
|
|
return new{T}(s, K_partial, K_diag, K_mixed, K_partial_1, Vs, hermitian, mode)
|
|
end
|
|
end
|
|
|
|
Base.size(H::Hamiltonian, i::Int)::Int = (i == 1 || i == 2) ? H.s.N^(H.s.d * (H.s.n - 2)) * length(H.s.unique_i) : throw(ArgumentError("Hamiltonian only has 2 dimesions"))
|
|
Base.size(H::Hamiltonian)::Dims{2} = (size(H, 1), size(H, 2))
|
|
|
|
"Dimensions of a vector to which 'H' can be applied"
|
|
vectorDims(H::Hamiltonian)::Dims = tuple(length(H.s.unique_i), fill(H.s.N, H.s.d * (H.s.n - 2))...)
|
|
|
|
"Apply 'H' on 'v' and store the result in 'out' using the 'cpu_tensor' backend"
|
|
function LinearAlgebra.mul!(out::Array{Complex{T}}, H::Hamiltonian{T}, v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
|
|
#LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
|
|
# apply V operator
|
|
@. out = H.Vs * v
|
|
# apply K opereator
|
|
nconList_v_template = -collect(1:(H.s.d * (H.s.n - 2) + 1))
|
|
for dim = 1:H.s.d
|
|
for coord1 = 1:(H.s.n - 1)
|
|
for coord2 = 1:coord1
|
|
i1 = which_index(H.s, dim, coord1)
|
|
i2 = which_index(H.s, dim, coord2)
|
|
nconList_1 = [-i1, 1]
|
|
nconList_2 = [-i2, 2]
|
|
nconList_v = copy(nconList_v_template)
|
|
if i1 == i2
|
|
nconList_2[1] = 1
|
|
else
|
|
nconList_v[i1] = 1
|
|
end
|
|
nconList_v[i2] = 2
|
|
|
|
tensor1 = coord1 == 1 ? H.K_partial_1[dim] : H.K_partial
|
|
tensor2 = coord2 == 1 ? H.K_partial_1[dim] : H.K_partial
|
|
v_new = @ncon((tensor1, tensor2, v), (nconList_1, nconList_2, nconList_v))
|
|
|
|
out = axpy!(1, v_new, out)
|
|
end
|
|
end
|
|
end
|
|
return out
|
|
end
|
|
|
|
"cuTENSOR contraction and accumulation (C = A * B + C)"
|
|
function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor
|
|
CUTENSOR.contraction!(one(eltype(C)), A.data, A.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR.CUTENSOR_OP_IDENTITY,
|
|
one(eltype(C)), C.data, C.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, CUTENSOR.CUTENSOR_OP_IDENTITY)
|
|
return C
|
|
end
|
|
|
|
"Apply 'H' on 'v' and store the result in 'out' using the 'gpu_cutensor' backend"
|
|
function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::Hamiltonian{T}, v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float}
|
|
#LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
|
|
ctx = context()
|
|
# apply V operator
|
|
NVTX.@range "V" @. out = H.Vs * v
|
|
synchronize(ctx)
|
|
# apply K opereator
|
|
coords = H.s.n - 1
|
|
inds_template = ('a' - 1) .+ collect(1:H.s.d*(coords))
|
|
v_t = CuTensor(v, copy(inds_template))
|
|
out_t = CuTensor(out, copy(inds_template))
|
|
for dim = 1:H.s.d
|
|
for coord1 = 1:coords
|
|
for coord2 = 1:coord1
|
|
i1 = which_index(H.s, dim, coord1)
|
|
i2 = which_index(H.s, dim, coord2)
|
|
@assert v_t.inds == inds_template "v indices permuted"
|
|
if i1 == i2
|
|
@assert H.K_diag.inds[2] == 'A' "K_diag indices permuted"
|
|
H.K_diag.inds[1] = 'a' - 1 + i1
|
|
v_t.inds[i1] = 'A'
|
|
#synchronize(ctx)
|
|
NVTX.@range "K-diag" out_t = contract_accumulate!(out_t, H.K_diag, v_t)
|
|
v_t.inds[i1] = 'a' - 1 + i1
|
|
else
|
|
@assert H.K_mixed.inds[2] == 'A' && H.K_mixed.inds[4] == 'B' "K_mixed indices permuted"
|
|
H.K_mixed.inds[1] = 'a' - 1 + i1
|
|
H.K_mixed.inds[3] = 'a' - 1 + i2
|
|
# OPTIMIZE: A and B can be swapped
|
|
v_t.inds[i1] = 'A'
|
|
v_t.inds[i2] = 'B'
|
|
#synchronize(ctx)
|
|
NVTX.@range "K-mixed" out_t = contract_accumulate!(out_t, H.K_mixed, v_t)
|
|
v_t.inds[i1] = 'a' - 1 + i1
|
|
v_t.inds[i2] = 'a' - 1 + i2
|
|
end
|
|
end
|
|
end
|
|
end
|
|
@assert out_t.inds == inds_template "out indices permuted"
|
|
synchronize(ctx)
|
|
return out_t.data
|
|
end
|
|
|
|
"Apply 'H' on 'v' and return the result"
|
|
function (H::Hamiltonian)(v)
|
|
out = similar(v)
|
|
return mul!(out, H, v)
|
|
end
|
|
|
|
tolerance = 1e-6
|
|
|
|
"Wrapper for KrylovKit.eigsolve"
|
|
function eig(H::Hamiltonian{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector,Vector,KrylovKit.ConvergenceInfo} where {T<:Float}
|
|
if H.mode == cpu_tensor
|
|
x₀ = rand(Complex{T}, vectorDims(H)...)
|
|
elseif H.mode == gpu_cutensor
|
|
x₀ = CUDA.rand(Complex{T}, vectorDims(H)...)
|
|
synchronize()
|
|
end
|
|
evals, evecs, info = eigsolve(H, x₀, levels, resonances ? :LI : :SR; ishermitian = H.hermitian, tol = tolerance, krylovdim = levels * 4)
|
|
info.converged < levels && throw(error("Not enough convergence"))
|
|
if H.hermitian evals = real.(evals) end
|
|
if H.mode == gpu_cutensor # to avoid possible GPU memory leak
|
|
CUDA.reclaim()
|
|
GC.gc(true)
|
|
end
|
|
return evals, evecs, info
|
|
end
|