Merge branch 'master' into debugging

This commit is contained in:
Nuwan Yapa 2023-04-07 22:32:50 -04:00
commit a9098b3d65
6 changed files with 206 additions and 186 deletions

72
CPU.jl
View File

@ -1,72 +0,0 @@
include("common.jl")
using TensorOperations, KrylovKit, LinearAlgebra
"A Hamiltonian that can be applied to a vector"
struct HOperator{T}
d::Int
n::Int
N::Int
L::T
μ::T
∂1::Matrix{Complex{T}}
Vs::Array{T}
function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, μ::T, n_image::Int) where {T<:Float}
k = -N÷2:N÷2-1
∂1 = ∂_1DOF.(L, N, k, k')
Vs = calculate_Vs(V_twobody, d, n, N, L, n_image)
return new{T}(d, n, N, L, μ, ∂1, Vs)
end
end
Base.size(H::HOperator, i::Int)::Int = (i == 1 || i == 2) ? H.N^(H.d * (H.n - 1)) : throw(ArgumentError("HOperator only has 2 dimesions"))
Base.size(H::HOperator)::Dims{2} = (size(H, 1), size(H, 2))
"Dimensions of a vector to which H can be applied"
vectorDims(H::HOperator)::Dims = tuple(fill(H.N, H.d * (H.n - 1))...)
"Apply H on v and store the result in out"
function LinearAlgebra.mul!(out::Array{Complex{T}}, H::HOperator{T}, v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
#LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
# apply V operator
@. out = H.Vs * v
# apply K opereator
coeff = -1 / (2 * H.μ)
coords = H.n - 1
nconList_v_template = -collect(1:H.d*(coords))
for dim = 1:H.d
for coord1 = 1:coords
for coord2 = 1:coord1
i1 = which_index(H.n, dim, coord1)
i2 = which_index(H.n, dim, coord2)
nconList_1 = [-i1, 1]
nconList_2 = [-i2, 2]
nconList_v = copy(nconList_v_template)
if i1 == i2
nconList_2[1] = 1
else
nconList_v[i1] = 1
end
nconList_v[i2] = 2
v_new = @ncon((H.∂1, H.∂1, v), (nconList_1, nconList_2, nconList_v))
out = axpy!(coeff, v_new, out)
end
end
end
return out
end
"Apply H on v and return the result"
function (H::HOperator{T})(v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
out = similar(v)
return mul!(out, H, v)
end
tolerance = 1e-6
"Wrapper for KrylovKit.eigsolve"
function eig(H::HOperator{T}, levels::Int)::Tuple{Vector{T},Any,Any} where {T<:Float}
x₀ = rand(Complex{T}, vectorDims(H))
evals, evecs, info = eigsolve(H, x₀, levels, :SR; ishermitian = true, tol = tolerance)
info.converged < levels && throw(error("Not enough convergence"))
return real.(evals), evecs, info
end

97
GPU.jl
View File

@ -1,97 +0,0 @@
include("common.jl")
using KrylovKit, LinearAlgebra, CUDA, CUDA.CUTENSOR
@assert CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available"
"A Hamiltonian that can be applied to a vector"
struct HOperator{T}
d::Int
n::Int
N::Int
K_diag::CuTensor{Complex{T}}
K_mixed::CuTensor{Complex{T}}
Vs::CuArray{T}
function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, μ::T, n_image::Int) where {T<:Float}
k = -N÷2:N÷2-1
K_partial = (im / sqrt(2 * μ)) .* ∂_1DOF.(L, N, k, k')
K_diag = CuTensor(CuArray(K_partial * K_partial), ['a', 'A'])
K_mixed = CuTensor(CuArray(K_partial), ['a', 'A']) * CuTensor(CuArray(K_partial), ['b', 'B'])
Vs = calculate_Vs(V_twobody, d, n, N, L, n_image)
return new{T}(d, n, N, K_diag, K_mixed, Vs)
end
end
Base.size(H::HOperator, i::Int)::Int = (i == 1 || i == 2) ? H.N^(H.d * (H.n - 1)) : throw(ArgumentError("HOperator only has 2 dimesions"))
Base.size(H::HOperator)::Dims{2} = (size(H, 1), size(H, 2))
"Dimensions of a vector to which H can be applied"
vectorDims(H::HOperator)::Dims = tuple(fill(H.N, H.d * (H.n - 1))...)
"cuTENSOR contraction and accumulation (C = A * B + C)"
function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor
CUTENSOR.contraction!(one(eltype(C)), A.data, A.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR.CUTENSOR_OP_IDENTITY,
one(eltype(C)), C.data, C.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, CUTENSOR.CUTENSOR_OP_IDENTITY)
return C
end
"Apply H on v and store the result in out"
function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::HOperator{T}, v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float}
#LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
ctx = context()
# apply V operator
NVTX.@range "V" @. out = H.Vs * v
synchronize(ctx)
# apply K opereator
coords = H.n - 1
inds_template = ('a' - 1) .+ collect(1:H.d*(coords))
v_t = CuTensor(v, copy(inds_template))
out_t = CuTensor(out, copy(inds_template))
for dim = 1:H.d
for coord1 = 1:coords
for coord2 = 1:coord1
i1 = which_index(H.n, dim, coord1)
i2 = which_index(H.n, dim, coord2)
@assert v_t.inds == inds_template "v indices permuted"
if i1 == i2
@assert H.K_diag.inds[2] == 'A' "K_diag indices permuted"
H.K_diag.inds[1] = 'a' - 1 + i1
v_t.inds[i1] = 'A'
#synchronize(ctx)
NVTX.@range "K-diag" out_t = contract_accumulate!(out_t, H.K_diag, v_t)
v_t.inds[i1] = 'a' - 1 + i1
else
@assert H.K_mixed.inds[2] == 'A' && H.K_mixed.inds[4] == 'B' "K_mixed indices permuted"
H.K_mixed.inds[1] = 'a' - 1 + i1
H.K_mixed.inds[3] = 'a' - 1 + i2
# OPTIMIZE: A and B can be swapped
v_t.inds[i1] = 'A'
v_t.inds[i2] = 'B'
#synchronize(ctx)
NVTX.@range "K-mixed" out_t = contract_accumulate!(out_t, H.K_mixed, v_t)
v_t.inds[i1] = 'a' - 1 + i1
v_t.inds[i2] = 'a' - 1 + i2
end
end
end
end
@assert out_t.inds == inds_template "out indices permuted"
synchronize(ctx)
return out_t.data
end
"Apply H on v and return the result"
function (H::HOperator{T})(v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float}
out = similar(v)
return mul!(out, H, v)
end
tolerance = 1e-6
"Wrapper for KrylovKit.eigsolve"
function eig(H::HOperator{T}, levels::Int)::Tuple{Vector{T},Any,Any} where {T<:Float}
x₀ = CUDA.rand(Complex{T}, vectorDims(H)...) # ... added
synchronize()
evals, evecs, info = eigsolve(H, x₀, levels, :SR; ishermitian = true, tol = tolerance)
info.converged < levels && throw(error("Not enough convergence"))
return real.(evals), evecs, info
end

145
Hamiltonian.jl Normal file
View File

@ -0,0 +1,145 @@
include("common.jl")
using TensorOperations, KrylovKit, LinearAlgebra, CUDA, CUDA.CUTENSOR
@enum Hamiltonian_backend cpu_tensor gpu_cutensor
"A Hamiltonian that can be applied to a vector"
struct Hamiltonian{T}
d::Int
n::Int
N::Int
L::T
μ::T
∂1 # Matrix{Complex{T}} or Nothing
K_diag # CuTensor{Complex{T}} or Nothing
K_mixed # CuTensor{Complex{T}} or Nothing
Vs # Array{Complex{T}} or CuArray{Complex{T}}
hermitian::Bool
mode::Hamiltonian_backend
function Hamiltonian{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int, mode::Hamiltonian_backend) where {T<:Float}
@assert mode != gpu_cutensor || CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available"
k = -N÷2:N÷2-1
Vs = calculate_Vs(V_twobody, d, n, N, L, ϕ, n_image)
hermitian = ϕ == 0.0
if mode == cpu_tensor
∂1 = exp(-im * ϕ) .* ∂_1DOF.(L, N, k, k')
return new{T}(d, n, N, L, μ, ∂1, nothing, nothing, Vs, hermitian, mode)
elseif mode == gpu_cutensor
K_partial = (exp(-im * ϕ) * im / sqrt(2 * μ)) .* ∂_1DOF.(L, N, k, k')
K_diag = CuTensor(CuArray(K_partial * K_partial), ['a', 'A'])
K_mixed = CuTensor(CuArray(K_partial), ['a', 'A']) * CuTensor(CuArray(K_partial), ['b', 'B'])
return new{T}(d, n, N, L, μ, nothing, K_diag, K_mixed, CuArray(Vs), hermitian, mode)
end
end
end
Base.size(H::Hamiltonian, i::Int)::Int = (i == 1 || i == 2) ? H.N^(H.d * (H.n - 1)) : throw(ArgumentError("Hamiltonian only has 2 dimesions"))
Base.size(H::Hamiltonian)::Dims{2} = (size(H, 1), size(H, 2))
"Dimensions of a vector to which 'H' can be applied"
vectorDims(H::Hamiltonian)::Dims = tuple(fill(H.N, H.d * (H.n - 1))...)
"Apply 'H' on 'v' and store the result in 'out' using the 'cpu_tensor' backend"
function LinearAlgebra.mul!(out::Array{Complex{T}}, H::Hamiltonian{T}, v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
#LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
# apply V operator
@. out = H.Vs * v
# apply K opereator
coeff = -1 / (2 * H.μ)
coords = H.n - 1
nconList_v_template = -collect(1:H.d*(coords))
for dim = 1:H.d
for coord1 = 1:coords
for coord2 = 1:coord1
i1 = which_index(H.n, dim, coord1)
i2 = which_index(H.n, dim, coord2)
nconList_1 = [-i1, 1]
nconList_2 = [-i2, 2]
nconList_v = copy(nconList_v_template)
if i1 == i2
nconList_2[1] = 1
else
nconList_v[i1] = 1
end
nconList_v[i2] = 2
v_new = @ncon((H.∂1, H.∂1, v), (nconList_1, nconList_2, nconList_v))
out = axpy!(coeff, v_new, out)
end
end
end
return out
end
"cuTENSOR contraction and accumulation (C = A * B + C)"
function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor
CUTENSOR.contraction!(one(eltype(C)), A.data, A.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR.CUTENSOR_OP_IDENTITY,
one(eltype(C)), C.data, C.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, CUTENSOR.CUTENSOR_OP_IDENTITY)
return C
end
"Apply 'H' on 'v' and store the result in 'out' using the 'gpu_cutensor' backend"
function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::Hamiltonian{T}, v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float}
#LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
ctx = context()
# apply V operator
NVTX.@range "V" @. out = H.Vs * v
synchronize(ctx)
# apply K opereator
coords = H.n - 1
inds_template = ('a' - 1) .+ collect(1:H.d*(coords))
v_t = CuTensor(v, copy(inds_template))
out_t = CuTensor(out, copy(inds_template))
for dim = 1:H.d
for coord1 = 1:coords
for coord2 = 1:coord1
i1 = which_index(H.n, dim, coord1)
i2 = which_index(H.n, dim, coord2)
@assert v_t.inds == inds_template "v indices permuted"
if i1 == i2
@assert H.K_diag.inds[2] == 'A' "K_diag indices permuted"
H.K_diag.inds[1] = 'a' - 1 + i1
v_t.inds[i1] = 'A'
#synchronize(ctx)
NVTX.@range "K-diag" out_t = contract_accumulate!(out_t, H.K_diag, v_t)
v_t.inds[i1] = 'a' - 1 + i1
else
@assert H.K_mixed.inds[2] == 'A' && H.K_mixed.inds[4] == 'B' "K_mixed indices permuted"
H.K_mixed.inds[1] = 'a' - 1 + i1
H.K_mixed.inds[3] = 'a' - 1 + i2
# OPTIMIZE: A and B can be swapped
v_t.inds[i1] = 'A'
v_t.inds[i2] = 'B'
#synchronize(ctx)
NVTX.@range "K-mixed" out_t = contract_accumulate!(out_t, H.K_mixed, v_t)
v_t.inds[i1] = 'a' - 1 + i1
v_t.inds[i2] = 'a' - 1 + i2
end
end
end
end
@assert out_t.inds == inds_template "out indices permuted"
synchronize(ctx)
return out_t.data
end
"Apply 'H' on 'v' and return the result"
function (H::Hamiltonian)(v)
out = similar(v)
return mul!(out, H, v)
end
tolerance = 1e-6
"Wrapper for KrylovKit.eigsolve"
function eig(H::Hamiltonian{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector,Vector,KrylovKit.ConvergenceInfo} where {T<:Float}
if H.mode == cpu_tensor
x₀ = rand(Complex{T}, vectorDims(H)...)
elseif H.mode == gpu_cutensor
x₀ = CUDA.rand(Complex{T}, vectorDims(H)...)
synchronize()
end
evals, evecs, info = eigsolve(H, x₀, levels, resonances ? :LI : :SR; ishermitian = H.hermitian, tol = tolerance)
resonances || info.converged < levels && throw(error("Not enough convergence")) # don't check convergence for resonances
if H.hermitian evals = real.(evals) end
return evals, evecs, info
end

View File

@ -1,21 +1,21 @@
using CUDA include("Hamiltonian.jl")
GPU_mode = !("CPU" in ARGS) && CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() GPU_mode = !("CPU" in ARGS) && CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu()
println("Running with ",Threads.nthreads()," thread(s)") println("Running with ",Threads.nthreads()," thread(s)")
if GPU_mode if GPU_mode
include("GPU.jl") mode=gpu_cutensor
println("Available GPUs:") println("Available GPUs:")
print(" ") print(" ")
println.(name.(devices())) println.(name.(devices()))
else else
include("CPU.jl") mode=cpu_tensor
end end
T=Float32 T=Float32
function V_test(r2::T)::T function V_test(r2)
return -4*exp(-r2/4) return -4*exp(-r2/4)
end end
@ -31,7 +31,7 @@ n_image=1
for L::T in 5.0:14.0 for L::T in 5.0:14.0
println("Constructing H operator...") println("Constructing H operator...")
@time H=HOperator{T}(V_test,3,3,N,L,convert(T,μ),n_image) @time H=Hamiltonian{T}(V_test,3,3,N,L,convert(T,0),convert(T,μ),n_image,mode)
println("Applying H 1000 times...") println("Applying H 1000 times...")
if GPU_mode if GPU_mode
v=CUDA.rand(Complex{T},vectorDims(H)...) v=CUDA.rand(Complex{T},vectorDims(H)...)

View File

@ -28,10 +28,10 @@ function get_Δk(n::Int, N::Int, i::CartesianIndex, dim::Int, p1::Int, p2::Int):
end end
"Calculate diagonal elements of the V matrix" "Calculate diagonal elements of the V matrix"
function calculate_Vs(V_twobody::Function, d::Int, n::Int, N::Int, L::T, n_image::Int)::Array{T} where {T<:Float} function calculate_Vs(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, n_image::Int)::Array{Complex{T}} where {T<:Float}
L²_over_N² = (L / N)^2 coeff² = (exp(im * ϕ) * L / N)^2
images = collect.(Iterators.product(fill(-n_image:n_image, d)...)) # TODO: Learn how to use tuples instead of vectors images = collect.(Iterators.product(fill(-n_image:n_image, d)...)) # TODO: Learn how to use tuples instead of vectors
Vs = zeros(T, fill(N, d * (n - 1))...) Vs = zeros(Complex{T}, fill(N, d * (n - 1))...)
Threads.@threads for i in CartesianIndices(Vs) Threads.@threads for i in CartesianIndices(Vs)
for p1 in 1:n for p1 in 1:n
for p2 in (p1 + 1):n for p2 in (p1 + 1):n
@ -48,7 +48,7 @@ function calculate_Vs(V_twobody::Function, d::Int, n::Int, N::Int, L::T, n_image
end end
for image in images for image in images
Δk² = norm_square(min_Δk .- (N .* image)) Δk² = norm_square(min_Δk .- (N .* image))
Vs[i] += V_twobody(Δk² * L²_over_N²) Vs[i] += V_twobody(Δk² * coeff²)
end end
end end
end end

View File

@ -6,26 +6,70 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"# prerequisite packages: KrylovKit, TensorOperations, LinearAlgebra, CUDA#tb/cutensor\n", "# prerequisite packages: KrylovKit, TensorOperations, LinearAlgebra, CUDA#tb/cutensor, Plots\n",
"\n", "include(\"Hamiltonian.jl\")\n",
"include(\"CPU.jl\") # using CPU mode\n", "mode = cpu_tensor # using CPU mode\n",
"T = Float32\n", "T = Float32 # single-precision mode"
"\n", ]
"V_gauss(r2::T)::T =\n", },
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"V_gauss(r2) =\n",
" -4 * exp(-r2 / 4)\n", " -4 * exp(-r2 / 4)\n",
"\n", "\n",
"d = 3\n", "d = 3\n",
"n = 3\n", "n = 3\n",
"N = 6\n", "N = 6\n",
"L::T = 12\n", "L::T = 12\n",
"mu::T = 0.5\n", "ϕ::T = 0.0\n",
"μ::T = 0.5\n",
"n_imag = 1\n", "n_imag = 1\n",
"\n", "\n",
"H = HOperator{T}(V_gauss, d, n, N, L, mu, n_imag)\n", "H = Hamiltonian{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag, mode)\n",
"@time evals, evecs, info = eig(H, 5)\n", "@time evals, evecs, info = eig(H, 5)\n",
"print(info.numops, \" operations : \")\n", "print(info.numops, \" operations : \")\n",
"println(evals)" "println(evals)"
] ]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"using Plots\n",
"\n",
"V_gauss(r2) =\n",
" -4 * exp(-r2 / 4)\n",
"\n",
"d = 3\n",
"n = 2\n",
"N = 32\n",
"L::T = 16\n",
"ϕ::T = 0.5\n",
"μ::T = 0.5\n",
"n_imag = 0\n",
"\n",
"H = Hamiltonian{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag, mode)\n",
"@time evals, evecs, info = eig(H, 20)\n",
"print(info.numops, \" operations : \")\n",
"print(evals)\n",
"\n",
"scatter(real.(evals), imag.(evals); legend=false)\n",
"xlabel!(\"Re E\")\n",
"ylabel!(\"Im E\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {