Combine CPU.jl and GPU.jl

2023-04-06 23:54:23 -04:00 · 2023-04-06 23:54:23 -04:00 · 6bbf238f4a
parent af89ace4ec
commit 6bbf238f4a
4 changed files with 76 additions and 96 deletions
--- a/CPU.jl
+++ b/CPU.jl
@ -1,73 +0,0 @@
-include("common.jl")
-using TensorOperations, KrylovKit, LinearAlgebra
-
-"A Hamiltonian that can be applied to a vector"
-struct HOperator{T}
-    d::Int
-    n::Int
-    N::Int
-    L::T
-    μ::T
-    ∂1::Matrix{Complex{T}}
-    Vs::Array{Complex{T}}
-    hermitian::Bool
-    function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int) where {T<:Float}
-        k = -N÷2:N÷2-1
-        ∂1 = exp(-im * ϕ) .* ∂_1DOF.(L, N, k, k')
-        Vs = calculate_Vs(V_twobody, d, n, N, L, ϕ, n_image)
-        return new{T}(d, n, N, L, μ, ∂1, Vs, ϕ == 0.0)
-    end
-end
-
-Base.size(H::HOperator, i::Int)::Int = (i == 1 || i == 2) ? H.N^(H.d * (H.n - 1)) : throw(ArgumentError("HOperator only has 2 dimesions"))
-Base.size(H::HOperator)::Dims{2} = (size(H, 1), size(H, 2))
-
-"Dimensions of a vector to which H can be applied"
-vectorDims(H::HOperator)::Dims = tuple(fill(H.N, H.d * (H.n - 1))...)
-
-"Apply H on v and store the result in out"
-function LinearAlgebra.mul!(out::Array{Complex{T}}, H::HOperator{T}, v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
-    #LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
-    # apply V operator
-    @. out = H.Vs * v
-    # apply K opereator
-    coeff = -1 / (2 * H.μ)
-    coords = H.n - 1
-    nconList_v_template = -collect(1:H.d*(coords))
-    for dim = 1:H.d
-        for coord1 = 1:coords
-            for coord2 = 1:coord1
-                i1 = which_index(H.n, dim, coord1)
-                i2 = which_index(H.n, dim, coord2)
-                nconList_1 = [-i1, 1]
-                nconList_2 = [-i2, 2]
-                nconList_v = copy(nconList_v_template)
-                if i1 == i2
-                    nconList_2[1] = 1
-                else
-                    nconList_v[i1] = 1
-                end
-                nconList_v[i2] = 2
-                v_new = @ncon((H.∂1, H.∂1, v), (nconList_1, nconList_2, nconList_v))
-                out = axpy!(coeff, v_new, out)
-            end
-        end
-    end
-    return out
-end
-
-"Apply H on v and return the result"
-function (H::HOperator{T})(v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
-    out = similar(v)
-    return mul!(out, H, v)
-end
-
-tolerance = 1e-6
-
-"Wrapper for KrylovKit.eigsolve"
-function eig(H::HOperator{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector{Complex{T}},Any,Any} where {T<:Float}
-    x₀ = rand(Complex{T}, vectorDims(H))
-    evals, evecs, info = eigsolve(H, x₀, levels, resonances ? :LI : :SR; ishermitian = H.hermitian, tol = tolerance)
-    resonances || info.converged < levels && throw(error("Not enough convergence")) # don't check convergence for resonances
-    return evals, evecs, info
-end
--- a/HOperator.jl
+++ b/HOperator.jl
@ -1,33 +1,75 @@
 include("common.jl")
-using KrylovKit, LinearAlgebra, CUDA, CUDA.CUTENSOR
+using TensorOperations, KrylovKit, LinearAlgebra, CUDA, CUDA.CUTENSOR

-@assert CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available"
+@enum HOperator_backend cpu_tensor gpu_cutensor

 "A Hamiltonian that can be applied to a vector"
 struct HOperator{T}
    d::Int
    n::Int
    N::Int
-    K_diag::CuTensor{Complex{T}}
-    K_mixed::CuTensor{Complex{T}}
-    Vs::CuArray{Complex{T}}
+    L::T
+    μ::T
+    ∂1 # Matrix{Complex{T}} or Nothing
+    K_diag # CuTensor{Complex{T}} or Nothing
+    K_mixed # CuTensor{Complex{T}} or Nothing
+    Vs # Array{Complex{T}} or CuArray{Complex{T}}
    hermitian::Bool
-    function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int) where {T<:Float}
+    mode::HOperator_backend
+    function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int, mode::HOperator_backend) where {T<:Float}
+        @assert mode != gpu_cutensor || CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available"
        k = -N÷2:N÷2-1
+        Vs = calculate_Vs(V_twobody, d, n, N, L, ϕ, n_image)
+        hermitian = ϕ == 0.0
+        if mode == cpu_tensor
+            ∂1 = exp(-im * ϕ) .* ∂_1DOF.(L, N, k, k')
+            return new{T}(d, n, N, L, μ, ∂1, nothing, nothing, Vs, hermitian, mode)                
+        elseif mode == gpu_cutensor
            K_partial = (exp(-im * ϕ) * im / sqrt(2 * μ)) .* ∂_1DOF.(L, N, k, k')
            K_diag = CuTensor(CuArray(K_partial * K_partial), ['a', 'A'])
            K_mixed = CuTensor(CuArray(K_partial), ['a', 'A']) * CuTensor(CuArray(K_partial), ['b', 'B'])
-        Vs = calculate_Vs(V_twobody, d, n, N, L, ϕ, n_image)
-        return new{T}(d, n, N, K_diag, K_mixed, Vs, ϕ == 0.0)
+            return new{T}(d, n, N, L, μ, nothing, K_diag, K_mixed, CuArray(Vs), hermitian, mode)
+        end
    end
 end

 Base.size(H::HOperator, i::Int)::Int = (i == 1 || i == 2) ? H.N^(H.d * (H.n - 1)) : throw(ArgumentError("HOperator only has 2 dimesions"))
 Base.size(H::HOperator)::Dims{2} = (size(H, 1), size(H, 2))

-"Dimensions of a vector to which H can be applied"
+"Dimensions of a vector to which 'H' can be applied"
 vectorDims(H::HOperator)::Dims = tuple(fill(H.N, H.d * (H.n - 1))...)

+"Apply 'H' on 'v' and store the result in 'out' using the 'cpu_tensor' backend"
+function LinearAlgebra.mul!(out::Array{Complex{T}}, H::HOperator{T}, v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
+    #LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
+    # apply V operator
+    @. out = H.Vs * v
+    # apply K opereator
+    coeff = -1 / (2 * H.μ)
+    coords = H.n - 1
+    nconList_v_template = -collect(1:H.d*(coords))
+    for dim = 1:H.d
+        for coord1 = 1:coords
+            for coord2 = 1:coord1
+                i1 = which_index(H.n, dim, coord1)
+                i2 = which_index(H.n, dim, coord2)
+                nconList_1 = [-i1, 1]
+                nconList_2 = [-i2, 2]
+                nconList_v = copy(nconList_v_template)
+                if i1 == i2
+                    nconList_2[1] = 1
+                else
+                    nconList_v[i1] = 1
+                end
+                nconList_v[i2] = 2
+                v_new = @ncon((H.∂1, H.∂1, v), (nconList_1, nconList_2, nconList_v))
+                out = axpy!(coeff, v_new, out)
+            end
+        end
+    end
+    return out
+end
+
 "cuTENSOR contraction and accumulation (C = A * B + C)"
 function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor
    CUTENSOR.contraction!(one(eltype(C)), A.data, A.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR.CUTENSOR_OP_IDENTITY,
@ -35,7 +77,7 @@ function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor
    return C
 end

-"Apply H on v and store the result in out"
+"Apply 'H' on 'v' and store the result in 'out' using the 'gpu_cutensor' backend"
 function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::HOperator{T}, v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float}
    #LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
    ctx = context()
@ -80,7 +122,13 @@ function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::HOperator{T}, v::CuArra
    return out_t.data
 end

-"Apply H on v and return the result"
+"Apply 'H' on 'v' and return the result using the 'cpu_tensor' backend"
+function (H::HOperator{T})(v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
+    out = similar(v)
+    return mul!(out, H, v)
+end
+
+"Apply 'H' on 'v' and return the result using the 'gpu_cutensor' backend"
 function (H::HOperator{T})(v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float}
    out = similar(v)
    return mul!(out, H, v)
@ -90,8 +138,12 @@ tolerance = 1e-6

 "Wrapper for KrylovKit.eigsolve"
 function eig(H::HOperator{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector{Complex{T}},Any,Any} where {T<:Float}
-    x₀ = CUDA.rand(Complex{T}, vectorDims(H)...) # ... added
+    if H.mode == cpu_tensor
+        x₀ = rand(Complex{T}, vectorDims(H)...)
+    elseif H.mode == gpu_cutensor
+        x₀ = CUDA.rand(Complex{T}, vectorDims(H)...)
        synchronize()
+    end
    evals, evecs, info = eigsolve(H, x₀, levels, resonances ? :LI : :SR; ishermitian = H.hermitian, tol = tolerance)
    resonances || info.converged < levels && throw(error("Not enough convergence")) # don't check convergence for resonances
    return evals, evecs, info
--- a/benchmark.jl
+++ b/benchmark.jl
@ -1,16 +1,16 @@
-using CUDA
+include("HOperator.jl")

 GPU_mode = !("CPU" in ARGS) && CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu()

 println("Running with ",Threads.nthreads()," thread(s)")

 if GPU_mode
-    include("GPU.jl")
+    mode=gpu_cutensor
    println("Available GPUs:")
    print("    ")
    println.(name.(devices()))
 else
-    include("CPU.jl")
+    mode=cpu_tensor
 end

 T=Float32
@ -31,7 +31,7 @@ n_image=1

 for L::T in 5.0:14.0
    println("Constructing H operator...")
-    @time H=HOperator{T}(V_test,3,3,N,L,convert(T,0),convert(T,μ),n_image)
+    @time H=HOperator{T}(V_test,3,3,N,L,convert(T,0),convert(T,μ),n_image,mode)
    println("Applying H 1000 times...")
    if GPU_mode
        v=CUDA.rand(Complex{T},vectorDims(H)...)
--- a/example.ipynb
+++ b/example.ipynb
@ -6,8 +6,9 @@
   "metadata": {},
   "outputs": [],
   "source": [
-    "# prerequisite packages: KrylovKit, TensorOperations, LinearAlgebra, CUDA#tb/cutensor\n",
-    "include(\"CPU.jl\") # using CPU mode\n",
+    "# prerequisite packages: KrylovKit, TensorOperations, LinearAlgebra, CUDA#tb/cutensor, Plots\n",
+    "include(\"HOperator.jl\")\n",
+    "mode = cpu_tensor # using CPU mode\n",
    "T = Float32 # single-precision mode"
   ]
  },
@ -28,7 +29,7 @@
    "μ::T = 0.5\n",
    "n_imag = 1\n",
    "\n",
-    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag)\n",
+    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag, mode)\n",
    "@time evals, evecs, info = eig(H, 5)\n",
    "print(info.numops, \" operations : \")\n",
    "println(evals)"
@ -53,7 +54,7 @@
    "μ::T = 0.5\n",
    "n_imag = 0\n",
    "\n",
-    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag)\n",
+    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag, mode)\n",
    "@time evals, evecs, info = eig(H, 20)\n",
    "print(info.numops, \" operations : \")\n",
    "print(evals)\n",