include("common.jl") using TensorOperations, KrylovKit, LinearAlgebra, CUDA, CUDA.CUTENSOR @enum Hamiltonian_backend cpu_tensor gpu_cutensor "A Hamiltonian that can be applied to a vector" struct Hamiltonian{T} d::Int n::Int N::Int L::T μ::T ∂1 # Matrix{Complex{T}} or Nothing K_diag # CuTensor{Complex{T}} or Nothing K_mixed # CuTensor{Complex{T}} or Nothing Vs # Array{Complex{T}} or CuArray{Complex{T}} hermitian::Bool mode::Hamiltonian_backend function Hamiltonian{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int, mode::Hamiltonian_backend) where {T<:Float} @assert mode != gpu_cutensor || CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available" k = -N÷2:N÷2-1 Vs = calculate_Vs(V_twobody, d, n, N, L, ϕ, n_image) hermitian = ϕ == 0.0 if mode == cpu_tensor ∂1 = exp(-im * ϕ) .* ∂_1DOF.(L, N, k, k') return new{T}(d, n, N, L, μ, ∂1, nothing, nothing, Vs, hermitian, mode) elseif mode == gpu_cutensor K_partial = (exp(-im * ϕ) * im / sqrt(2 * μ)) .* ∂_1DOF.(L, N, k, k') K_diag = CuTensor(CuArray(K_partial * K_partial), ['a', 'A']) K_mixed = CuTensor(CuArray(K_partial), ['a', 'A']) * CuTensor(CuArray(K_partial), ['b', 'B']) return new{T}(d, n, N, L, μ, nothing, K_diag, K_mixed, CuArray(Vs), hermitian, mode) end end end Base.size(H::Hamiltonian, i::Int)::Int = (i == 1 || i == 2) ? H.N^(H.d * (H.n - 1)) : throw(ArgumentError("Hamiltonian only has 2 dimesions")) Base.size(H::Hamiltonian)::Dims{2} = (size(H, 1), size(H, 2)) "Dimensions of a vector to which 'H' can be applied" vectorDims(H::Hamiltonian)::Dims = tuple(fill(H.N, H.d * (H.n - 1))...) "Apply 'H' on 'v' and store the result in 'out' using the 'cpu_tensor' backend" function LinearAlgebra.mul!(out::Array{Complex{T}}, H::Hamiltonian{T}, v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float} #LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match # apply V operator @. out = H.Vs * v # apply K opereator coeff = -1 / (2 * H.μ) coords = H.n - 1 nconList_v_template = -collect(1:H.d*(coords)) for dim = 1:H.d for coord1 = 1:coords for coord2 = 1:coord1 i1 = which_index(H.n, dim, coord1) i2 = which_index(H.n, dim, coord2) nconList_1 = [-i1, 1] nconList_2 = [-i2, 2] nconList_v = copy(nconList_v_template) if i1 == i2 nconList_2[1] = 1 else nconList_v[i1] = 1 end nconList_v[i2] = 2 v_new = @ncon((H.∂1, H.∂1, v), (nconList_1, nconList_2, nconList_v)) out = axpy!(coeff, v_new, out) end end end return out end "cuTENSOR contraction and accumulation (C = A * B + C)" function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor CUTENSOR.contraction!(one(eltype(C)), A.data, A.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, one(eltype(C)), C.data, C.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, CUTENSOR.CUTENSOR_OP_IDENTITY) return C end "Apply 'H' on 'v' and store the result in 'out' using the 'gpu_cutensor' backend" function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::Hamiltonian{T}, v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float} #LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match ctx = context() # apply V operator NVTX.@range "V" @. out = H.Vs * v synchronize(ctx) # apply K opereator coords = H.n - 1 inds_template = ('a' - 1) .+ collect(1:H.d*(coords)) v_t = CuTensor(v, copy(inds_template)) out_t = CuTensor(out, copy(inds_template)) for dim = 1:H.d for coord1 = 1:coords for coord2 = 1:coord1 i1 = which_index(H.n, dim, coord1) i2 = which_index(H.n, dim, coord2) @assert v_t.inds == inds_template "v indices permuted" if i1 == i2 @assert H.K_diag.inds[2] == 'A' "K_diag indices permuted" H.K_diag.inds[1] = 'a' - 1 + i1 v_t.inds[i1] = 'A' #synchronize(ctx) NVTX.@range "K-diag" out_t = contract_accumulate!(out_t, H.K_diag, v_t) v_t.inds[i1] = 'a' - 1 + i1 else @assert H.K_mixed.inds[2] == 'A' && H.K_mixed.inds[4] == 'B' "K_mixed indices permuted" H.K_mixed.inds[1] = 'a' - 1 + i1 H.K_mixed.inds[3] = 'a' - 1 + i2 # OPTIMIZE: A and B can be swapped v_t.inds[i1] = 'A' v_t.inds[i2] = 'B' #synchronize(ctx) NVTX.@range "K-mixed" out_t = contract_accumulate!(out_t, H.K_mixed, v_t) v_t.inds[i1] = 'a' - 1 + i1 v_t.inds[i2] = 'a' - 1 + i2 end end end end @assert out_t.inds == inds_template "out indices permuted" synchronize(ctx) return out_t.data end "Apply 'H' on 'v' and return the result" function (H::Hamiltonian)(v) out = similar(v) return mul!(out, H, v) end tolerance = 1e-6 "Wrapper for KrylovKit.eigsolve" function eig(H::Hamiltonian{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector{Complex{T}},Any,Any} where {T<:Float} if H.mode == cpu_tensor x₀ = rand(Complex{T}, vectorDims(H)...) elseif H.mode == gpu_cutensor x₀ = CUDA.rand(Complex{T}, vectorDims(H)...) synchronize() end evals, evecs, info = eigsolve(H, x₀, levels, resonances ? :LI : :SR; ishermitian = H.hermitian, tol = tolerance) resonances || info.converged < levels && throw(error("Not enough convergence")) # don't check convergence for resonances return evals, evecs, info end