diff --git a/Hamiltonian.jl b/Hamiltonian.jl
index a206677..c23942b 100644
--- a/Hamiltonian.jl
+++ b/Hamiltonian.jl
@@ -6,27 +6,23 @@ using TensorOperations, KrylovKit, LinearAlgebra, CUDA, cuTENSOR, NVTX
 "A Hamiltonian that can be applied to a vector"
 struct Hamiltonian{T}
     s::system{T}
-    K_partial::Matrix{Complex{T}}
-    K_diag::Union{CuTensor{Complex{T}},Nothing}
-    K_mixed::Union{CuTensor{Complex{T}},Nothing}
-    Vs::Union{Array{Complex{T}},CuArray{Complex{T}}}
+    K::Union{CuTensor{Complex{T}}, Matrix{Complex{T}}}
+    Vs::Union{Array{Complex{T}}, CuArray{Complex{T}}}
     hermitian::Bool
     mode::Hamiltonian_backend
 
     function Hamiltonian{T}(s::system{T}, V_twobody::Function, ϕ::Real, n_image::Int, mode::Hamiltonian_backend) where {T<:Float}
         @assert mode != gpu_cutensor || CUDA.functional() && CUDA.has_cuda() && CUDA.has_cuda_gpu() "CUDA not available"
-        k = -s.N÷2:s.N÷2-1
-        Vs = calculate_Vs(s, V_twobody, convert(T, ϕ), n_image)
         hermitian = ϕ == 0.0
-        K_partial = (exp(-im * convert(T, ϕ)) * im / sqrt(2 * s.μ)) .* ∂_1DOF.(Ref(s), k, k')
-        K_diag = nothing
-        K_mixed = nothing
+        Vs = calculate_Vs(s, V_twobody, convert(T, ϕ), n_image)
+        k = -s.N÷2:s.N÷2-1
+        ∂ = ∂_1DOF.(Ref(s), k, k')
+        K = exp(-2im * convert(T, ϕ)) .* (∂ * ∂)    # TODO: Calculate K matrix elements directly
         if mode == gpu_cutensor
-            K_diag = CuTensor(CuArray(K_partial * K_partial), ['a', 'A'])
-            K_mixed = CuTensor(CuArray(K_partial), ['a', 'A']) * CuTensor(CuArray(K_partial), ['b', 'B'])
+            K = CuTensor(K, ['a', 'A'])
             Vs = CuArray(Vs)
         end
-        return new{T}(s, K_partial, K_diag, K_mixed, Vs, hermitian, mode)
+        return new{T}(s, K, Vs, hermitian, mode)
     end
 end
 
@@ -45,31 +41,23 @@ function LinearAlgebra.mul!(out::Array{Complex{T}}, H::Hamiltonian{T}, v::Array{
     coords = H.s.n - 1
     nconList_v_template = -collect(1:H.s.d*(coords))
     for dim = 1:H.s.d
-        for coord1 = 1:coords
-            for coord2 = 1:coord1
-                i1 = which_index(H.s, dim, coord1)
-                i2 = which_index(H.s, dim, coord2)
-                nconList_1 = [-i1, 1]
-                nconList_2 = [-i2, 2]
-                nconList_v = copy(nconList_v_template)
-                if i1 == i2
-                    nconList_2[1] = 1
-                else
-                    nconList_v[i1] = 1
-                end
-                nconList_v[i2] = 2
-                v_new = @ncon((H.K_partial, H.K_partial, v), (nconList_1, nconList_2, nconList_v))
-                out = axpy!(1, v_new, out)
-            end
+        for coord = 1:coords
+            i = which_index(H.s, dim, coord)
+            nconList_K = [-i, 1]
+            nconList_v = copy(nconList_v_template)
+            nconList_v[i] = 1
+            v_new = @ncon((H.K, v), (nconList_K, nconList_v))
+            coeff = -1 / (2 * H.s.μs[coord])
+            out = axpy!(coeff, v_new, out)
         end
     end
     return out
 end
 
 "cuTENSOR contraction and accumulation (C = A * B + C)"
-function contract_accumulate!(C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor
-    CUTENSOR.contraction!(one(eltype(C)), A.data, A.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, CUTENSOR.CUTENSOR_OP_IDENTITY,
-                          one(eltype(C)), C.data, C.inds, CUTENSOR.CUTENSOR_OP_IDENTITY, CUTENSOR.CUTENSOR_OP_IDENTITY)
+function contract_accumulate!(alpha::Numer, C::CuTensor, A::CuTensor, B::CuTensor)::CuTensor
+    cuTENSOR.contraction!(alpha, A.data, A.inds, cuTENSOR.CUTENSOR_OP_IDENTITY, B.data, B.inds, cuTENSOR.CUTENSOR_OP_IDENTITY,
+                          one(eltype(C)), C.data, C.inds, cuTENSOR.CUTENSOR_OP_IDENTITY, cuTENSOR.CUTENSOR_OP_IDENTITY)
     return C
 end
 
@@ -86,31 +74,16 @@ function LinearAlgebra.mul!(out::CuArray{Complex{T}}, H::Hamiltonian{T}, v::CuAr
     v_t = CuTensor(v, copy(inds_template))
     out_t = CuTensor(out, copy(inds_template))
     for dim = 1:H.s.d
-        for coord1 = 1:coords
-            for coord2 = 1:coord1
-                i1 = which_index(H.s, dim, coord1)
-                i2 = which_index(H.s, dim, coord2)
-                @assert v_t.inds == inds_template "v indices permuted"
-                if i1 == i2
-                    @assert H.K_diag.inds[2] == 'A' "K_diag indices permuted"
-                    H.K_diag.inds[1] = 'a' - 1 + i1
-                    v_t.inds[i1] = 'A'
-                    #synchronize(ctx)
-                    NVTX.@range "K-diag" out_t = contract_accumulate!(out_t, H.K_diag, v_t)
-                    v_t.inds[i1] = 'a' - 1 + i1
-                else
-                    @assert H.K_mixed.inds[2] == 'A' && H.K_mixed.inds[4] == 'B' "K_mixed indices permuted"
-                    H.K_mixed.inds[1] = 'a' - 1 + i1
-                    H.K_mixed.inds[3] = 'a' - 1 + i2
-                    # OPTIMIZE: A and B can be swapped
-                    v_t.inds[i1] = 'A'
-                    v_t.inds[i2] = 'B'
-                    #synchronize(ctx)
-                    NVTX.@range "K-mixed" out_t = contract_accumulate!(out_t, H.K_mixed, v_t)
-                    v_t.inds[i1] = 'a' - 1 + i1
-                    v_t.inds[i2] = 'a' - 1 + i2
-                end
-            end
+        for coord = 1:coords
+            i = which_index(H.s, dim, coord)
+            @assert v_t.inds == inds_template "v indices permuted"
+            @assert H.K_diag.inds[2] == 'A' "K_diag indices permuted"
+            H.K.inds[1] = 'a' - 1 + i
+            v_t.inds[i] = 'A'
+            #synchronize(ctx)
+            coeff = -1 / (2 * H.s.μs[coord])
+            NVTX.@range "K" out_t = contract_accumulate!(coeff, out_t, H.K, v_t)
+            v_t.inds[i] = 'a' - 1 + i
         end
     end
     @assert out_t.inds == inds_template "out indices permuted"
diff --git a/common.jl b/common.jl
index aae050d..36f3d22 100644
--- a/common.jl
+++ b/common.jl
@@ -1,18 +1,33 @@
 Float = Union{Float32,Float64}
 
+norm_square(x) = sum(x .* x)
+reducedMass(m1::Float, m2::Float) = 1 / (1/m1 + 1/m2)
+
 "A few-body system defined by its physical parameters"
 struct system{T}
     d::Int
     n::Int
     N::Int
     L::T
-    μ::T
+    μs::Vector{T}
+    invU::Matrix{T}
 
-    system{T}(d::Int, n::Int, N::Int, L::Real, μ::Real=0.5) where {T<:Float} = new{T}(d, n, N, convert(T, L), convert(T, μ))
+    function system{T}(d::Int, n::Int, N::Int, L::Real) where {T<:Float}
+        μs = collect(1:n) ./ collect(2:(n + 1))
+
+        U = zeros(T, n, n)
+        for coord in 1:n
+            U[coord, 1:coord] .= 1 / coord
+            if coord != n
+                U[coord, coord + 1] = -1
+            end
+        end
+        invU = inv(U)[:, 1:(n - 1)]
+        
+        return new{T}(d, n, N, convert(T, L), μs, invU)
+    end
 end
 
-norm_square(x::Array{Int})::Int = sum(x .* x)
-
 "Eq (46): Partial derivative matrix element for 1 degree of freedom"
 function ∂_1DOF(s::system{T}, k::Int, l::Int)::Complex{T} where {T<:Float}
     if k == l
@@ -22,19 +37,17 @@ function ∂_1DOF(s::system{T}, k::Int, l::Int)::Complex{T} where {T<:Float}
     end
 end
 
-"Which index (dimension of the multidimensional array) corresponds to spatial dimension 'dim' and particle 'p'?"
-which_index(s::system, dim::Int, p::Int)::Int = (dim - 1) * (s.n - 1) + p
+"Which index (dimension of the multidimensional array) corresponds to spatial dimension 'dim' of coordinate 'coord'?"
+which_index(s::system, dim::Int, coord::Int)::Int = (dim - 1) * (s.n - 1) + coord
 
-"Δk (distance in terms of lattice paramter) between two particles along the given dimension"
-function get_Δk(s::system, i::CartesianIndex, dim::Int, p1::Int, p2::Int)::Int
-    if p1 == p2
-        return 0
-    elseif p1 == s.n
-        return -(i[which_index(s, dim, p2)] - s.N ÷ 2 - 1)
-    elseif p2 == s.n
-        return i[which_index(s, dim, p1)] - s.N ÷ 2 - 1
+"Get the distance to the nearest image of the particle"
+function nearest(s, Δk)
+    if Δk > s.N ÷ 2
+        return Δk - s.N
+    elseif Δk < -s.N ÷ 2
+        return Δk + s.N
     else
-        return i[which_index(s, dim, p1)] - i[which_index(s, dim, p2)]
+        return Δk
     end
 end
 
@@ -44,21 +57,16 @@ function calculate_Vs(s::system{T}, V_twobody::Function, ϕ::T, n_image::Int)::A
     images = collect.(Iterators.product(fill(-n_image:n_image, s.d)...)) # TODO: Learn how to use tuples instead of vectors
     Vs = zeros(Complex{T}, fill(s.N, s.d * (s.n - 1))...)
     Threads.@threads for i in CartesianIndices(Vs)
+        xs = reshape(collect(Tuple(i)), s.n - 1, s.d) .- (s.N ÷ 2 - 1)
+        rs = s.invU * xs
         for p1 in 1:s.n
-            for p2 in (p1 + 1):s.n
-                min_Δk = Array{Int}(undef, s.d)
+            for p2 in 1:(p1 - 1)
+                Δk = Array{T}(undef, s.d)
                 for dim in 1:s.d
-                    Δk = get_Δk(s, i, dim, p1, p2)
-                    if Δk > s.N ÷ 2
-                        min_Δk[dim] = Δk - s.N
-                    elseif Δk < -s.N ÷ 2
-                        min_Δk[dim] = Δk + s.N
-                    else
-                        min_Δk[dim] = Δk
-                    end
+                    Δk[dim] = nearest(s, rs[p1, dim] - rs[p2, dim])
                 end
                 for image in images
-                    Δk² = norm_square(min_Δk .- (s.N .* image))
+                    Δk² = norm_square(Δk .- (s.N .* image))
                     Vs[i] += V_twobody(Δk² * coeff²)
                 end
             end