From 153fb770c3b1081e197e965cc49f39622401ba0b Mon Sep 17 00:00:00 2001
From: ysyapa <ysyapa@ncsu.edu>
Date: Fri, 31 Mar 2023 07:38:38 -0400
Subject: [PATCH 1/8] First working implementation

---
 CPU.jl        | 19 +++++++++---------
 GPU.jl        | 19 +++++++++---------
 common.jl     |  8 ++++----
 example.ipynb | 53 ++++++++++++++++++++++++++++++++++++++++++++++-----
 4 files changed, 72 insertions(+), 27 deletions(-)

diff --git a/CPU.jl b/CPU.jl
index 332cf19..254e8b5 100644
--- a/CPU.jl
+++ b/CPU.jl
@@ -9,12 +9,13 @@ struct HOperator{T}
     L::T
     μ::T
     ∂1::Matrix{Complex{T}}
-    Vs::Array{T}
-    function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, μ::T, n_image::Int) where {T<:Float}
+    Vs::Array{Complex{T}}
+    hermitian::Bool
+    function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int) where {T<:Float}
         k = -N÷2:N÷2-1
-        ∂1 = ∂_1DOF.(L, N, k, k')
-        Vs = calculate_Vs(V_twobody, d, n, N, L, n_image)
-        return new{T}(d, n, N, L, μ, ∂1, Vs)
+        ∂1 = exp(-im * ϕ) .* ∂_1DOF.(L, N, k, k')
+        Vs = calculate_Vs(V_twobody, d, n, N, L, ϕ, n_image)
+        return new{T}(d, n, N, L, μ, ∂1, Vs, ϕ == 0.0)
     end
 end
 
@@ -64,9 +65,9 @@ end
 tolerance = 1e-6
 
 "Wrapper for KrylovKit.eigsolve"
-function eig(H::HOperator{T}, levels::Int)::Tuple{Vector{T},Any,Any} where {T<:Float}
+function eig(H::HOperator{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector{Complex{T}},Any,Any} where {T<:Float}
     x₀ = rand(Complex{T}, vectorDims(H))
-    evals, evecs, info = eigsolve(H, x₀, levels, :SR; ishermitian = true, tol = tolerance)
-    info.converged < levels && throw(error("Not enough convergence"))
-    return real.(evals), evecs, info
+    evals, evecs, info = eigsolve(H, x₀, levels, resonances ? :LI : :SR; ishermitian = H.hermitian, tol = tolerance)
+    resonances || info.converged < levels && throw(error("Not enough convergence")) # don't check convergence for resonances
+    return evals, evecs, info
 end
diff --git a/GPU.jl b/GPU.jl
index 8e731d4..7363265 100644
--- a/GPU.jl
+++ b/GPU.jl
@@ -10,14 +10,15 @@ struct HOperator{T}
     N::Int
     K_diag::CuTensor{Complex{T}}
     K_mixed::CuTensor{Complex{T}}
-    Vs::CuArray{T}
-    function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, μ::T, n_image::Int) where {T<:Float}
+    Vs::CuArray{Complex{T}}
+    hermitian::Bool
+    function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int) where {T<:Float}
         k = -N÷2:N÷2-1
-        K_partial = (im / sqrt(2 * μ)) .* ∂_1DOF.(L, N, k, k')
+        K_partial = (exp(-im * ϕ) * im / sqrt(2 * μ)) .* ∂_1DOF.(L, N, k, k')
         K_diag = CuTensor(CuArray(K_partial * K_partial), ['a', 'A'])
         K_mixed = CuTensor(CuArray(K_partial), ['a', 'A']) * CuTensor(CuArray(K_partial), ['b', 'B'])
-        Vs = calculate_Vs(V_twobody, d, n, N, L, n_image)
-        return new{T}(d, n, N, K_diag, K_mixed, Vs)
+        Vs = calculate_Vs(V_twobody, d, n, N, L, ϕ, n_image)
+        return new{T}(d, n, N, K_diag, K_mixed, Vs, ϕ == 0.0)
     end
 end
 
@@ -88,10 +89,10 @@ end
 tolerance = 1e-6
 
 "Wrapper for KrylovKit.eigsolve"
-function eig(H::HOperator{T}, levels::Int)::Tuple{Vector{T},Any,Any} where {T<:Float}
+function eig(H::HOperator{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector{Complex{T}},Any,Any} where {T<:Float}
     x₀ = CUDA.rand(Complex{T}, vectorDims(H)...) # ... added
     synchronize()
-    evals, evecs, info = eigsolve(H, x₀, levels, :SR; ishermitian = true, tol = tolerance)
-    info.converged < levels && throw(error("Not enough convergence"))
-    return real.(evals), evecs, info
+    evals, evecs, info = eigsolve(H, x₀, levels, resonances ? :LI : :SR; ishermitian = H.hermitian, tol = tolerance)
+    resonances || info.converged < levels && throw(error("Not enough convergence")) # don't check convergence for resonances
+    return evals, evecs, info
 end
diff --git a/common.jl b/common.jl
index 6a63689..a7ab441 100644
--- a/common.jl
+++ b/common.jl
@@ -28,10 +28,10 @@ function get_Δk(n::Int, N::Int, i::CartesianIndex, dim::Int, p1::Int, p2::Int):
 end
 
 "Calculate diagonal elements of the V matrix"
-function calculate_Vs(V_twobody::Function, d::Int, n::Int, N::Int, L::T, n_image::Int)::Array{T} where {T<:Float}
-    L²_over_N² = (L / N)^2
+function calculate_Vs(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, n_image::Int)::Array{Complex{T}} where {T<:Float}
+    coeff² = (exp(im * ϕ) * L / N)^2
     images = collect.(Iterators.product(fill(-n_image:n_image, d)...)) # TODO: Learn how to use tuples instead of vectors
-    Vs = zeros(T, fill(N, d * (n - 1))...)
+    Vs = zeros(Complex{T}, fill(N, d * (n - 1))...)
     Threads.@threads for i in CartesianIndices(Vs)
         for p1 in 1:n
             for p2 in (p1 + 1):n
@@ -48,7 +48,7 @@ function calculate_Vs(V_twobody::Function, d::Int, n::Int, N::Int, L::T, n_image
                 end
                 for image in images
                     Δk² = norm_square(min_Δk .- (N .* image))
-                    Vs[i] += V_twobody(Δk² * L²_over_N²)
+                    Vs[i] += V_twobody(Δk² * coeff²)
                 end
             end
         end
diff --git a/example.ipynb b/example.ipynb
index ad9cd64..374ba65 100644
--- a/example.ipynb
+++ b/example.ipynb
@@ -7,25 +7,68 @@
    "outputs": [],
    "source": [
     "# prerequisite packages: KrylovKit, TensorOperations, LinearAlgebra, CUDA#tb/cutensor\n",
-    "\n",
     "include(\"CPU.jl\") # using CPU mode\n",
-    "T = Float32\n",
-    "\n",
-    "V_gauss(r2::T)::T =\n",
+    "T = Float32 # single-precision mode"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "V_gauss(r2) =\n",
     "    -4 * exp(-r2 / 4)\n",
     "\n",
     "d = 3\n",
     "n = 3\n",
     "N = 6\n",
     "L::T = 12\n",
+    "ϕ::T = 0.0\n",
     "mu::T = 0.5\n",
     "n_imag = 1\n",
     "\n",
-    "H = HOperator{T}(V_gauss, d, n, N, L, mu, n_imag)\n",
+    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, mu, n_imag)\n",
     "@time evals, evecs, info = eig(H, 5)\n",
     "print(info.numops, \" operations : \")\n",
     "println(evals)"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "using Plots\n",
+    "\n",
+    "V_gauss(r2) =\n",
+    "    -4 * exp(-r2 / 4)\n",
+    "\n",
+    "d = 3\n",
+    "n = 2\n",
+    "N = 32\n",
+    "L::T = 16\n",
+    "ϕ::T = 0.5\n",
+    "mu::T = 0.5\n",
+    "n_imag = 0\n",
+    "\n",
+    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, mu, n_imag)\n",
+    "@time evals, evecs, info = eig(H, 20)\n",
+    "print(info.numops, \" operations : \")\n",
+    "print(evals)\n",
+    "\n",
+    "scatter(real.(evals), imag.(evals); legend=false)\n",
+    "xlabel!(\"Re E\")\n",
+    "ylabel!(\"Im E\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {

From c30c47ad9c04d951819113e39015c27f247c7842 Mon Sep 17 00:00:00 2001
From: ysyapa <ysyapa@ncsu.edu>
Date: Tue, 4 Apr 2023 17:41:43 -0400
Subject: [PATCH 2/8] Cosmetic

---
 example.ipynb | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/example.ipynb b/example.ipynb
index 374ba65..c52e018 100644
--- a/example.ipynb
+++ b/example.ipynb
@@ -25,10 +25,10 @@
     "N = 6\n",
     "L::T = 12\n",
     "ϕ::T = 0.0\n",
-    "mu::T = 0.5\n",
+    "μ::T = 0.5\n",
     "n_imag = 1\n",
     "\n",
-    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, mu, n_imag)\n",
+    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag)\n",
     "@time evals, evecs, info = eig(H, 5)\n",
     "print(info.numops, \" operations : \")\n",
     "println(evals)"
@@ -50,10 +50,10 @@
     "N = 32\n",
     "L::T = 16\n",
     "ϕ::T = 0.5\n",
-    "mu::T = 0.5\n",
+    "μ::T = 0.5\n",
     "n_imag = 0\n",
     "\n",
-    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, mu, n_imag)\n",
+    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag)\n",
     "@time evals, evecs, info = eig(H, 20)\n",
     "print(info.numops, \" operations : \")\n",
     "print(evals)\n",

From af89ace4ecb75c09fba632596b9e7a2caf726293 Mon Sep 17 00:00:00 2001
From: ysyapa <ysyapa@ncsu.edu>
Date: Thu, 6 Apr 2023 23:53:36 -0400
Subject: [PATCH 3/8] Bug fix

---
 benchmark.jl | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/benchmark.jl b/benchmark.jl
index 12c1b9f..5ed311d 100644
--- a/benchmark.jl
+++ b/benchmark.jl
@@ -15,7 +15,7 @@ end
 
 T=Float32
 
-function V_test(r2::T)::T
+function V_test(r2)
     return -4*exp(-r2/4)
 end
 
@@ -31,7 +31,7 @@ n_image=1
 
 for L::T in 5.0:14.0
     println("Constructing H operator...")
-    @time H=HOperator{T}(V_test,3,3,N,L,convert(T,μ),n_image)
+    @time H=HOperator{T}(V_test,3,3,N,L,convert(T,0),convert(T,μ),n_image)
     println("Applying H 1000 times...")
     if GPU_mode
         v=CUDA.rand(Complex{T},vectorDims(H)...)

From 6bbf238f4aa20f7fa747b5a3d2dcd9792165fbf2 Mon Sep 17 00:00:00 2001
From: ysyapa <ysyapa@ncsu.edu>
Date: Thu, 6 Apr 2023 23:54:23 -0400
Subject: [PATCH 4/8] Combine CPU.jl and GPU.jl

---
 CPU.jl                 | 73 -------------------------------------
 GPU.jl => HOperator.jl | 82 ++++++++++++++++++++++++++++++++++--------
 benchmark.jl           |  8 ++---
 example.ipynb          |  9 ++---
 4 files changed, 76 insertions(+), 96 deletions(-)
 delete mode 100644 CPU.jl
 rename GPU.jl => HOperator.jl (54%)

diff --git a/CPU.jl b/CPU.jl
deleted file mode 100644
index 254e8b5..0000000
--- a/CPU.jl
+++ /dev/null
@@ -1,73 +0,0 @@
-include("common.jl")
-using TensorOperations, KrylovKit, LinearAlgebra
-
-"A Hamiltonian that can be applied to a vector"
-struct HOperator{T}
-    d::Int
-    n::Int
-    N::Int
-    L::T
-    μ::T
-    ∂1::Matrix{Complex{T}}
-    Vs::Array{Complex{T}}
-    hermitian::Bool
-    function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int) where {T<:Float}
-        k = -N÷2:N÷2-1
-        ∂1 = exp(-im * ϕ) .* ∂_1DOF.(L, N, k, k')
-        Vs = calculate_Vs(V_twobody, d, n, N, L, ϕ, n_image)
-        return new{T}(d, n, N, L, μ, ∂1, Vs, ϕ == 0.0)
-    end
-end
-
-Base.size(H::HOperator, i::Int)::Int = (i == 1 || i == 2) ? H.N^(H.d * (H.n - 1)) : throw(ArgumentError("HOperator only has 2 dimesions"))
-Base.size(H::HOperator)::Dims{2} = (size(H, 1), size(H, 2))
-
-"Dimensions of a vector to which H can be applied"
-vectorDims(H::HOperator)::Dims = tuple(fill(H.N, H.d * (H.n - 1))...)
-
-"Apply H on v and store the result in out"
-function LinearAlgebra.mul!(out::Array{Complex{T}}, H::HOperator{T}, v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
-    #LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
-    # apply V operator
-    @. out = H.Vs * v
-    # apply K opereator
-    coeff = -1 / (2 * H.μ)
-    coords = H.n - 1
-    nconList_v_template = -collect(1:H.d*(coords))
-    for dim = 1:H.d
-        for coord1 = 1:coords
-            for coord2 = 1:coord1
-                i1 = which_index(H.n, dim, coord1)
-                i2 = which_index(H.n, dim, coord2)
-                nconList_1 = [-i1, 1]
-                nconList_2 = [-i2, 2]
-                nconList_v = copy(nconList_v_template)
-                if i1 == i2
-                    nconList_2[1] = 1
-                else
-                    nconList_v[i1] = 1
-                end
-                nconList_v[i2] = 2
-                v_new = @ncon((H.∂1, H.∂1, v), (nconList_1, nconList_2, nconList_v))
-                out = axpy!(coeff, v_new, out)
-            end
-        end
-    end
-    return out
-end
-
-"Apply H on v and return the result"
-function (H::HOperator{T})(v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
-    out = similar(v)
-    return mul!(out, H, v)
-end
-
-tolerance = 1e-6
-
-"Wrapper for KrylovKit.eigsolve"
-function eig(H::HOperator{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector{Complex{T}},Any,Any} where {T<:Float}
-    x₀ = rand(Complex{T}, vectorDims(H))
-    evals, evecs, info = eigsolve(H, x₀, levels, resonances ? :LI : :SR; ishermitian = H.hermitian, tol = tolerance)
-    resonances || info.converged < levels && throw(error("Not enough convergence")) # don't check convergence for resonances
-    return evals, evecs, info
-end
diff --git a/GPU.jl b/HOperator.jl
similarity index 54%
rename from GPU.jl
rename to HOperator.jl
index 7363265..07efdff 100644
--- a/GPU.jl
+++ b/HOperator.jl
@@ -1,33 +1,75 @@
 include("common.jl")
-using KrylovKit, LinearAlgebra, CUDA, CUDA.CUTENSOR
+using TensorOperations, KrylovKit, LinearAlgebra, CUDA, CUDA.CUTENSOR
 
-@assert CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available"
+@enum HOperator_backend cpu_tensor gpu_cutensor
 
 "A Hamiltonian that can be applied to a vector"
 struct HOperator{T}
     d::Int
     n::Int
     N::Int
-    K_diag::CuTensor{Complex{T}}
-    K_mixed::CuTensor{Complex{T}}
-    Vs::CuArray{Complex{T}}
+    L::T
+    μ::T
+    ∂1 # Matrix{Complex{T}} or Nothing
+    K_diag # CuTensor{Complex{T}} or Nothing
+    K_mixed # CuTensor{Complex{T}} or Nothing
+    Vs # Array{Complex{T}} or CuArray{Complex{T}}
     hermitian::Bool
-    function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int) where {T<:Float}
+    mode::HOperator_backend
+    function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int, mode::HOperator_backend) where {T<:Float}
+        @assert mode != gpu_cutensor || CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available"
         k = -N÷2:N÷2-1
-        K_partial = (exp(-im * ϕ) * im / sqrt(2 * μ)) .* ∂_1DOF.(L, N, k, k')
-        K_diag = CuTensor(CuArray(K_partial * K_partial), ['a', 'A'])
-        K_mixed = CuTensor(CuArray(K_partial), ['a', 'A']) * CuTensor(CuArray(K_partial), ['b', 'B'])
         Vs = calculate_Vs(V_twobody, d, n, N, L, ϕ, n_image)
-        return new{T}(d, n, N, K_diag, K_mixed, Vs, ϕ == 0.0)
+        hermitian = ϕ == 0.0
+        if mode == cpu_tensor
+            ∂1 = exp(-im * ϕ) .* ∂_1DOF.(L, N, k, k')
+            return new{T}(d, n, N, L, μ, ∂1, nothing, nothing, Vs, hermitian, mode)                
+        elseif mode == gpu_cutensor
+            K_partial = (exp(-im * ϕ) * im / sqrt(2 * μ)) .* ∂_1DOF.(L, N, k, k')
+            K_diag = CuTensor(CuArray(K_partial * K_partial), ['a', 'A'])
+            K_mixed = CuTensor(CuArray(K_partial), ['a', 'A']) * CuTensor(CuArray(K_partial), ['b', 'B'])
+            return new{T}(d, n, N, L, μ, nothing, K_diag, K_mixed, CuArray(Vs), hermitian, mode)
+        end
     end
 end
 
 Base.size(H::HOperator, i::Int)::Int = (i == 1 || i == 2) ? H.N^(H.d * (H.n - 1)) : throw(ArgumentError("HOperator only has 2 dimesions"))
 Base.size(H::HOperator)::Dims{2} = (size(H, 1), size(H, 2))
 
-"Dimensions of a vector to which H can be applied"
+"Dimensions of a vector to which 'H' can be applied"
 vectorDims(H::HOperator)::Dims = tuple(fill(H.N, H.d * (H.n - 1))...)
 
+"Apply 'H' on 'v' and store the result in 'out' using the 'cpu_tensor' backend"
+function LinearAlgebra.mul!(out::Array{Complex{T}}, H::HOperator{T}, v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
+    #LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
+    # apply V operator
+    @. out = H.Vs * v
+    # apply K opereator
+    coeff = -1 / (2 * H.μ)
+    coords = H.n - 1
+    nconList_v_template = -collect(1:H.d*(coords))
+    for dim = 1:H.d
+        for coord1 = 1:coords
+            for coord2 = 1:coord1
+                i1 = which_index(H.n, dim, coord1)
+                i2 = which_index(H.n, dim, coord2)
+                nconList_1 = [-i1, 1]
+                nconList_2 = [-i2, 2]
+                nconList_v = copy(nconList_v_template)
+                if i1 == i2
+                    nconList_2[1] = 1
+                else
+                    nconList_v[i1] = 1
+                end
+                nconList_v[i2] = 2
+                v_new = @ncon((H.∂1, H.∂1, v), (nconList_1, nconList_2, nconList_v))
+                out = axpy!(coeff, v_new, out)
+            end
+        end
+    end
+    return out
+end
+
 "cuTENSOR contraction and accumulation (C = A * B + C)"
 function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor
     CUTENSOR.contraction!(one(eltype(C)), A.data, A.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR.CUTENSOR_OP_IDENTITY,
@@ -35,7 +77,7 @@ function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor
     return C
 end
 
-"Apply H on v and store the result in out"
+"Apply 'H' on 'v' and store the result in 'out' using the 'gpu_cutensor' backend"
 function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::HOperator{T}, v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float}
     #LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
     ctx = context()
@@ -80,7 +122,13 @@ function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::HOperator{T}, v::CuArra
     return out_t.data
 end
 
-"Apply H on v and return the result"
+"Apply 'H' on 'v' and return the result using the 'cpu_tensor' backend"
+function (H::HOperator{T})(v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
+    out = similar(v)
+    return mul!(out, H, v)
+end
+
+"Apply 'H' on 'v' and return the result using the 'gpu_cutensor' backend"
 function (H::HOperator{T})(v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float}
     out = similar(v)
     return mul!(out, H, v)
@@ -90,8 +138,12 @@ tolerance = 1e-6
 
 "Wrapper for KrylovKit.eigsolve"
 function eig(H::HOperator{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector{Complex{T}},Any,Any} where {T<:Float}
-    x₀ = CUDA.rand(Complex{T}, vectorDims(H)...) # ... added
-    synchronize()
+    if H.mode == cpu_tensor
+        x₀ = rand(Complex{T}, vectorDims(H)...)
+    elseif H.mode == gpu_cutensor
+        x₀ = CUDA.rand(Complex{T}, vectorDims(H)...)
+        synchronize()
+    end
     evals, evecs, info = eigsolve(H, x₀, levels, resonances ? :LI : :SR; ishermitian = H.hermitian, tol = tolerance)
     resonances || info.converged < levels && throw(error("Not enough convergence")) # don't check convergence for resonances
     return evals, evecs, info
diff --git a/benchmark.jl b/benchmark.jl
index 5ed311d..63457e4 100644
--- a/benchmark.jl
+++ b/benchmark.jl
@@ -1,16 +1,16 @@
-using CUDA
+include("HOperator.jl")
 
 GPU_mode = !("CPU" in ARGS) && CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu()
 
 println("Running with ",Threads.nthreads()," thread(s)")
 
 if GPU_mode
-    include("GPU.jl")
+    mode=gpu_cutensor
     println("Available GPUs:")
     print("    ")
     println.(name.(devices()))
 else
-    include("CPU.jl")
+    mode=cpu_tensor
 end
 
 T=Float32
@@ -31,7 +31,7 @@ n_image=1
 
 for L::T in 5.0:14.0
     println("Constructing H operator...")
-    @time H=HOperator{T}(V_test,3,3,N,L,convert(T,0),convert(T,μ),n_image)
+    @time H=HOperator{T}(V_test,3,3,N,L,convert(T,0),convert(T,μ),n_image,mode)
     println("Applying H 1000 times...")
     if GPU_mode
         v=CUDA.rand(Complex{T},vectorDims(H)...)
diff --git a/example.ipynb b/example.ipynb
index c52e018..a8caef4 100644
--- a/example.ipynb
+++ b/example.ipynb
@@ -6,8 +6,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# prerequisite packages: KrylovKit, TensorOperations, LinearAlgebra, CUDA#tb/cutensor\n",
-    "include(\"CPU.jl\") # using CPU mode\n",
+    "# prerequisite packages: KrylovKit, TensorOperations, LinearAlgebra, CUDA#tb/cutensor, Plots\n",
+    "include(\"HOperator.jl\")\n",
+    "mode = cpu_tensor # using CPU mode\n",
     "T = Float32 # single-precision mode"
    ]
   },
@@ -28,7 +29,7 @@
     "μ::T = 0.5\n",
     "n_imag = 1\n",
     "\n",
-    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag)\n",
+    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag, mode)\n",
     "@time evals, evecs, info = eig(H, 5)\n",
     "print(info.numops, \" operations : \")\n",
     "println(evals)"
@@ -53,7 +54,7 @@
     "μ::T = 0.5\n",
     "n_imag = 0\n",
     "\n",
-    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag)\n",
+    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag, mode)\n",
     "@time evals, evecs, info = eig(H, 20)\n",
     "print(info.numops, \" operations : \")\n",
     "print(evals)\n",

From 1f67dea4c98bd5ce0057b0cfcdd24e3f6b7b9b21 Mon Sep 17 00:00:00 2001
From: ysyapa <ysyapa@ncsu.edu>
Date: Fri, 7 Apr 2023 00:00:22 -0400
Subject: [PATCH 5/8] Simplification of overloaded function

---
 HOperator.jl  | 10 ++--------
 example.ipynb | 33 ++++++++++++++++++++++++++++-----
 2 files changed, 30 insertions(+), 13 deletions(-)

diff --git a/HOperator.jl b/HOperator.jl
index 07efdff..10b0259 100644
--- a/HOperator.jl
+++ b/HOperator.jl
@@ -122,14 +122,8 @@ function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::HOperator{T}, v::CuArra
     return out_t.data
 end
 
-"Apply 'H' on 'v' and return the result using the 'cpu_tensor' backend"
-function (H::HOperator{T})(v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
-    out = similar(v)
-    return mul!(out, H, v)
-end
-
-"Apply 'H' on 'v' and return the result using the 'gpu_cutensor' backend"
-function (H::HOperator{T})(v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float}
+"Apply 'H' on 'v' and return the result"
+function (H::HOperator)(v)
     out = similar(v)
     return mul!(out, H, v)
 end
diff --git a/example.ipynb b/example.ipynb
index a8caef4..8b5103f 100644
--- a/example.ipynb
+++ b/example.ipynb
@@ -2,9 +2,23 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "LoadError",
+     "evalue": "LoadError: invalid redefinition of constant cpu_tensor\nin expression starting at c:\\Users\\yapan\\DVR.jl\\HOperator.jl:4",
+     "output_type": "error",
+     "traceback": [
+      "LoadError: invalid redefinition of constant cpu_tensor\n",
+      "in expression starting at c:\\Users\\yapan\\DVR.jl\\HOperator.jl:4\n",
+      "\n",
+      "Stacktrace:\n",
+      " [1] top-level scope\n",
+      "   @ Enums.jl:204"
+     ]
+    }
+   ],
    "source": [
     "# prerequisite packages: KrylovKit, TensorOperations, LinearAlgebra, CUDA#tb/cutensor, Plots\n",
     "include(\"HOperator.jl\")\n",
@@ -14,9 +28,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "  4.377701 seconds (5.09 M allocations: 764.743 MiB, 6.09% gc time, 71.04% compilation time: 99% of which was recompilation)\n",
+      "114 operations : ComplexF32[-7.6208663f0 + 0.0f0im, -3.551723f0 + 0.0f0im, -3.5371912f0 + 0.0f0im, -3.5240355f0 + 0.0f0im, -3.5159583f0 + 0.0f0im, -3.4865863f0 + 0.0f0im, -3.1896422f0 + 0.0f0im, -2.9661055f0 + 0.0f0im]\n"
+     ]
+    }
+   ],
    "source": [
     "V_gauss(r2) =\n",
     "    -4 * exp(-r2 / 4)\n",
@@ -37,7 +60,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [

From 03605c060aa2d21c4aa8a023b99f20565bb494b4 Mon Sep 17 00:00:00 2001
From: ysyapa <ysyapa@ncsu.edu>
Date: Fri, 7 Apr 2023 00:11:40 -0400
Subject: [PATCH 6/8] Clear outputs

---
 example.ipynb | 33 +++++----------------------------
 1 file changed, 5 insertions(+), 28 deletions(-)

diff --git a/example.ipynb b/example.ipynb
index 8b5103f..a8caef4 100644
--- a/example.ipynb
+++ b/example.ipynb
@@ -2,23 +2,9 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "LoadError",
-     "evalue": "LoadError: invalid redefinition of constant cpu_tensor\nin expression starting at c:\\Users\\yapan\\DVR.jl\\HOperator.jl:4",
-     "output_type": "error",
-     "traceback": [
-      "LoadError: invalid redefinition of constant cpu_tensor\n",
-      "in expression starting at c:\\Users\\yapan\\DVR.jl\\HOperator.jl:4\n",
-      "\n",
-      "Stacktrace:\n",
-      " [1] top-level scope\n",
-      "   @ Enums.jl:204"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# prerequisite packages: KrylovKit, TensorOperations, LinearAlgebra, CUDA#tb/cutensor, Plots\n",
     "include(\"HOperator.jl\")\n",
@@ -28,18 +14,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "  4.377701 seconds (5.09 M allocations: 764.743 MiB, 6.09% gc time, 71.04% compilation time: 99% of which was recompilation)\n",
-      "114 operations : ComplexF32[-7.6208663f0 + 0.0f0im, -3.551723f0 + 0.0f0im, -3.5371912f0 + 0.0f0im, -3.5240355f0 + 0.0f0im, -3.5159583f0 + 0.0f0im, -3.4865863f0 + 0.0f0im, -3.1896422f0 + 0.0f0im, -2.9661055f0 + 0.0f0im]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "V_gauss(r2) =\n",
     "    -4 * exp(-r2 / 4)\n",
@@ -60,7 +37,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [

From a8a7bdb44f8eb51ff149b309b7195748cacf8a3f Mon Sep 17 00:00:00 2001
From: yapanuwan <yapanuwan@gmail.com>
Date: Fri, 7 Apr 2023 20:02:34 -0400
Subject: [PATCH 7/8] Rename HOperator to Hamiltonian

---
 HOperator.jl => Hamiltonian.jl | 22 +++++++++++-----------
 benchmark.jl                   |  4 ++--
 example.ipynb                  |  6 +++---
 3 files changed, 16 insertions(+), 16 deletions(-)
 rename HOperator.jl => Hamiltonian.jl (84%)

diff --git a/HOperator.jl b/Hamiltonian.jl
similarity index 84%
rename from HOperator.jl
rename to Hamiltonian.jl
index 10b0259..ce0542f 100644
--- a/HOperator.jl
+++ b/Hamiltonian.jl
@@ -1,10 +1,10 @@
 include("common.jl")
 using TensorOperations, KrylovKit, LinearAlgebra, CUDA, CUDA.CUTENSOR
 
-@enum HOperator_backend cpu_tensor gpu_cutensor
+@enum Hamiltonian_backend cpu_tensor gpu_cutensor
 
 "A Hamiltonian that can be applied to a vector"
-struct HOperator{T}
+struct Hamiltonian{T}
     d::Int
     n::Int
     N::Int
@@ -15,8 +15,8 @@ struct HOperator{T}
     K_mixed # CuTensor{Complex{T}} or Nothing
     Vs # Array{Complex{T}} or CuArray{Complex{T}}
     hermitian::Bool
-    mode::HOperator_backend
-    function HOperator{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int, mode::HOperator_backend) where {T<:Float}
+    mode::Hamiltonian_backend
+    function Hamiltonian{T}(V_twobody::Function, d::Int, n::Int, N::Int, L::T, ϕ::T, μ::T, n_image::Int, mode::Hamiltonian_backend) where {T<:Float}
         @assert mode != gpu_cutensor || CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available"
         k = -N÷2:N÷2-1
         Vs = calculate_Vs(V_twobody, d, n, N, L, ϕ, n_image)
@@ -33,14 +33,14 @@ struct HOperator{T}
     end
 end
 
-Base.size(H::HOperator, i::Int)::Int = (i == 1 || i == 2) ? H.N^(H.d * (H.n - 1)) : throw(ArgumentError("HOperator only has 2 dimesions"))
-Base.size(H::HOperator)::Dims{2} = (size(H, 1), size(H, 2))
+Base.size(H::Hamiltonian, i::Int)::Int = (i == 1 || i == 2) ? H.N^(H.d * (H.n - 1)) : throw(ArgumentError("Hamiltonian only has 2 dimesions"))
+Base.size(H::Hamiltonian)::Dims{2} = (size(H, 1), size(H, 2))
 
 "Dimensions of a vector to which 'H' can be applied"
-vectorDims(H::HOperator)::Dims = tuple(fill(H.N, H.d * (H.n - 1))...)
+vectorDims(H::Hamiltonian)::Dims = tuple(fill(H.N, H.d * (H.n - 1))...)
 
 "Apply 'H' on 'v' and store the result in 'out' using the 'cpu_tensor' backend"
-function LinearAlgebra.mul!(out::Array{Complex{T}}, H::HOperator{T}, v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
+function LinearAlgebra.mul!(out::Array{Complex{T}}, H::Hamiltonian{T}, v::Array{Complex{T}})::Array{Complex{T}} where {T<:Float}
     #LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
     # apply V operator
     @. out = H.Vs * v
@@ -78,7 +78,7 @@ function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor
 end
 
 "Apply 'H' on 'v' and store the result in 'out' using the 'gpu_cutensor' backend"
-function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::HOperator{T}, v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float}
+function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::Hamiltonian{T}, v::CuArray{Complex{T}})::CuArray{Complex{T}} where {T<:Float}
     #LinearMaps.check_dim_mul(out,H,v) --- dimensions don't match
     ctx = context()
     # apply V operator
@@ -123,7 +123,7 @@ function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::HOperator{T}, v::CuArra
 end
 
 "Apply 'H' on 'v' and return the result"
-function (H::HOperator)(v)
+function (H::Hamiltonian)(v)
     out = similar(v)
     return mul!(out, H, v)
 end
@@ -131,7 +131,7 @@ end
 tolerance = 1e-6
 
 "Wrapper for KrylovKit.eigsolve"
-function eig(H::HOperator{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector{Complex{T}},Any,Any} where {T<:Float}
+function eig(H::Hamiltonian{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector{Complex{T}},Any,Any} where {T<:Float}
     if H.mode == cpu_tensor
         x₀ = rand(Complex{T}, vectorDims(H)...)
     elseif H.mode == gpu_cutensor
diff --git a/benchmark.jl b/benchmark.jl
index 63457e4..12324de 100644
--- a/benchmark.jl
+++ b/benchmark.jl
@@ -1,4 +1,4 @@
-include("HOperator.jl")
+include("Hamiltonian.jl")
 
 GPU_mode = !("CPU" in ARGS) && CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu()
 
@@ -31,7 +31,7 @@ n_image=1
 
 for L::T in 5.0:14.0
     println("Constructing H operator...")
-    @time H=HOperator{T}(V_test,3,3,N,L,convert(T,0),convert(T,μ),n_image,mode)
+    @time H=Hamiltonian{T}(V_test,3,3,N,L,convert(T,0),convert(T,μ),n_image,mode)
     println("Applying H 1000 times...")
     if GPU_mode
         v=CUDA.rand(Complex{T},vectorDims(H)...)
diff --git a/example.ipynb b/example.ipynb
index a8caef4..b92409e 100644
--- a/example.ipynb
+++ b/example.ipynb
@@ -7,7 +7,7 @@
    "outputs": [],
    "source": [
     "# prerequisite packages: KrylovKit, TensorOperations, LinearAlgebra, CUDA#tb/cutensor, Plots\n",
-    "include(\"HOperator.jl\")\n",
+    "include(\"Hamiltonian.jl\")\n",
     "mode = cpu_tensor # using CPU mode\n",
     "T = Float32 # single-precision mode"
    ]
@@ -29,7 +29,7 @@
     "μ::T = 0.5\n",
     "n_imag = 1\n",
     "\n",
-    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag, mode)\n",
+    "H = Hamiltonian{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag, mode)\n",
     "@time evals, evecs, info = eig(H, 5)\n",
     "print(info.numops, \" operations : \")\n",
     "println(evals)"
@@ -54,7 +54,7 @@
     "μ::T = 0.5\n",
     "n_imag = 0\n",
     "\n",
-    "H = HOperator{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag, mode)\n",
+    "H = Hamiltonian{T}(V_gauss, d, n, N, L, ϕ, μ, n_imag, mode)\n",
     "@time evals, evecs, info = eig(H, 20)\n",
     "print(info.numops, \" operations : \")\n",
     "print(evals)\n",

From 65775cf9bebcb73cb1fbbca1c3202332d54389b3 Mon Sep 17 00:00:00 2001
From: Nuwan Yapa <ysyapa@ncsu.edu>
Date: Fri, 7 Apr 2023 22:27:40 -0400
Subject: [PATCH 8/8] Tidy up eig() return values

---
 Hamiltonian.jl | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Hamiltonian.jl b/Hamiltonian.jl
index ce0542f..c99b3ea 100644
--- a/Hamiltonian.jl
+++ b/Hamiltonian.jl
@@ -131,7 +131,7 @@ end
 tolerance = 1e-6
 
 "Wrapper for KrylovKit.eigsolve"
-function eig(H::Hamiltonian{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector{Complex{T}},Any,Any} where {T<:Float}
+function eig(H::Hamiltonian{T}, levels::Int; resonances = !H.hermitian)::Tuple{Vector,Vector,KrylovKit.ConvergenceInfo} where {T<:Float}
     if H.mode == cpu_tensor
         x₀ = rand(Complex{T}, vectorDims(H)...)
     elseif H.mode == gpu_cutensor
@@ -140,5 +140,6 @@ function eig(H::Hamiltonian{T}, levels::Int; resonances = !H.hermitian)::Tuple{V
     end
     evals, evecs, info = eigsolve(H, x₀, levels, resonances ? :LI : :SR; ishermitian = H.hermitian, tol = tolerance)
     resonances || info.converged < levels && throw(error("Not enough convergence")) # don't check convergence for resonances
+    if H.hermitian evals = real.(evals) end
     return evals, evecs, info
 end