[NDTensors] [ITensors] [ITensorGPU] Bump NDTensors v0.1.46, ITensors …

…v0.3.26, CUDA v4.0, ITensorGPU v0.1.1 (#1053)
ITensor · Feb 7, 2023 · e2a5726 · e2a5726 · mtfishman · Feb 7, 2023
1 parent 3f43e24
commit e2a5726
Show file tree

Hide file tree

Showing 6 changed files with 77 additions and 74 deletions.
diff --git a/ITensorGPU/Project.toml b/ITensorGPU/Project.toml
@@ -1,7 +1,7 @@
 name = "ITensorGPU"
 uuid = "d89171c1-af8f-46b3-badf-d2a472317c15"
 authors = ["Katharine Hyatt", "Matthew Fishman <mfishman@flatironinstitute.org>"]
-version = "0.1.0"
+version = "0.1.1"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -11,23 +11,26 @@ Functors = "d9f16b24-f501-4c13-a1f2-28368ffc5196"
 HDF5 = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
 ITensors = "9136182c-28ba-11e9-034c-db9fb085ebd5"
 LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
+NDTensors = "23ae76d9-e61a-49c4-8f12-3f1a16adf9cf"
 Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
 SimpleTraits = "699a6c99-e7fa-54fc-8d76-47d257e15c1d"
 StaticArrays = "90137ffa-7385-5640-81b9-e52037218182"
 Strided = "5e0ebb24-38b0-5f93-81fe-25c709ecae67"
 TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
+cuTENSOR = "011b41b2-24ef-40a8-b3eb-fa098493e9e1"
 
 [compat]
-Adapt = "3.3, 3.4"
-CUDA = "3.5.0"
+Adapt = "3.5"
+CUDA = "4.0"
 Combinatorics = "1.0.2"
 Functors = "0.2, 0.3, 0.4"
 HDF5 = "0.15.7, 0.16"
-ITensors = "0.3.19"
+ITensors = "0.3.26"
 SimpleTraits = "0.9.4"
 StaticArrays = "1.2.13"
 Strided = "1.1.2"
 TimerOutputs = "0.5.13"
+cuTENSOR = "1.0.1"
 julia = "1.6"
 
 [extras]

diff --git a/ITensorGPU/src/ITensorGPU.jl b/ITensorGPU/src/ITensorGPU.jl
@@ -1,37 +1,26 @@
 module ITensorGPU
 
+using Adapt
 using CUDA
-using CUDA.Adapt
-using CUDA.CUTENSOR
 using CUDA.CUBLAS
 using CUDA.CUSOLVER
 using Functors
+using ITensors
 using LinearAlgebra
-using Random, Strided
-using TimerOutputs
+using NDTensors
+using Random
 using SimpleTraits
 using StaticArrays
-using ITensors
-using ITensors.NDTensors
 using Strided
+using TimerOutputs
+using cuTENSOR
+
+using NDTensors: setdata, setstorage, cpu, IsWrappedArray, parenttype
+
+import Adapt: adapt_structure
+import Base: *, permutedims!
 import CUDA: CuArray, CuMatrix, CuVector, cu
-import CUDA.CUTENSOR: cutensorContractionPlan_t, cutensorAlgo_t
-import CUDA.Adapt: adapt_structure
 import CUDA.Mem: pin
-#=
-const devs = Ref{Vector{CUDAdrv.CuDevice}}()
-const dev_rows = Ref{Int}(0)
-const dev_cols = Ref{Int}(0)
-function __init__()
-  voltas    = filter(dev->occursin("V100", CUDAdrv.name(dev)), collect(CUDAdrv.devices()))
-  pascals    = filter(dev->occursin("P100", CUDAdrv.name(dev)), collect(CUDAdrv.devices()))
-  devs[] = voltas[1:1]
-  #devs[] = pascals[1:2]
-  CUBLASMG.cublasMgDeviceSelect(CUBLASMG.mg_handle(), length(devs[]), devs[])
-  dev_rows[] = 1
-  dev_cols[] = 1
-end
-=#
 import ITensors:
   randn!,
   compute_contraction_labels,
@@ -50,7 +39,7 @@ import ITensors:
   permute,
   BroadcastStyle,
   Indices
-import ITensors.NDTensors:
+import NDTensors:
   Atrans,
   Btrans,
   CombinerTensor,
@@ -88,10 +77,10 @@ import ITensors.NDTensors:
   set_ndims,
   similartype,
   zero_contraction_output
+import cuTENSOR: cutensorContractionPlan_t, cutensorAlgo_t
 
-using ITensors.NDTensors: setdata, setstorage, cpu, IsWrappedArray, parenttype
-
-import Base.*, Base.permutedims!
+#const ContractionPlans = Dict{String, Tuple{cutensorAlgo_t, cutensorContractionPlan_t}}()
+const ContractionPlans = Dict{String,cutensorAlgo_t}()
 
 include("cuarray/set_types.jl")
 include("traits.jl")
@@ -105,10 +94,21 @@ include("tensor/cudiag.jl")
 include("cuitensor.jl")
 include("mps/cumps.jl")
 
-#const ContractionPlans = Dict{String, Tuple{cutensorAlgo_t, cutensorContractionPlan_t}}()
-const ContractionPlans = Dict{String,cutensorAlgo_t}()
-
 export cu,
   cpu, cuITensor, randomCuITensor, cuMPS, randomCuMPS, productCuMPS, randomCuMPO, cuMPO
 
+## TODO: Is this needed?
+## const devs = Ref{Vector{CUDAdrv.CuDevice}}()
+## const dev_rows = Ref{Int}(0)
+## const dev_cols = Ref{Int}(0)
+## function __init__()
+##   voltas    = filter(dev->occursin("V100", CUDAdrv.name(dev)), collect(CUDAdrv.devices()))
+##   pascals    = filter(dev->occursin("P100", CUDAdrv.name(dev)), collect(CUDAdrv.devices()))
+##   devs[] = voltas[1:1]
+##   #devs[] = pascals[1:2]
+##   CUBLASMG.cublasMgDeviceSelect(CUBLASMG.mg_handle(), length(devs[]), devs[])
+##   dev_rows[] = 1
+##   dev_cols[] = 1
+## end
+
 end #module
diff --git a/ITensorGPU/src/tensor/cudense.jl b/ITensorGPU/src/tensor/cudense.jl
@@ -261,7 +261,7 @@ function _contract!(
   for (ii, ic) in enumerate(Cinds)
     ctcinds[ii] = findfirst(x -> x == ic, ind_dict)
   end
-  id_op = CUDA.CUTENSOR.CUTENSOR.CUTENSOR_OP_IDENTITY
+  id_op = cuTENSOR.CUTENSOR_OP_IDENTITY
   dict_key = ""
   for cc in zip(ctcinds, Cdims)
     dict_key *= string(cc[1]) * "," * string(cc[2]) * ","
@@ -277,7 +277,7 @@ function _contract!(
       dict_val = ContractionPlans[dict_key]
       algo = dict_val
       #plan  = dict_val[2]
-      Cdata = CUDA.CUTENSOR.contraction!(
+      Cdata = cuTENSOR.contraction!(
         α,
         Adata,
         Vector{Char}(ctainds),
@@ -300,13 +300,13 @@ function _contract!(
       best_plan = nothing
       best_algo = nothing
       max_algos = Ref{Int32}(C_NULL)
-      CUDA.CUTENSOR.cutensorContractionMaxAlgos(max_algos)
+      cuTENSOR.cutensorContractionMaxAlgos(max_algos)
       # fix once the other options are documented
-      #algos = collect(Cint(CUDA.CUTENSOR.CUTENSOR_ALGO_GETT):Cint(max_algos[] - 1))
-      algos = collect(Cint(CUDA.CUTENSOR.CUTENSOR_ALGO_GETT):Cint(-1))
+      #algos = collect(Cint(cuTENSOR.CUTENSOR_ALGO_GETT):Cint(max_algos[] - 1))
+      algos = collect(Cint(cuTENSOR.CUTENSOR_ALGO_GETT):Cint(-1))
       for algo in reverse(algos)
         try
-          Cdata, this_time, bytes, gctime, memallocs = @timed CUDA.CUTENSOR.contraction!(
+          Cdata, this_time, bytes, gctime, memallocs = @timed cuTENSOR.contraction!(
             α,
             Adata,
             Vector{Char}(ctainds),
@@ -319,12 +319,12 @@ function _contract!(
             Vector{Char}(ctcinds),
             id_op,
             id_op;
-            algo=CUDA.CUTENSOR.cutensorAlgo_t(algo),
+            algo=cuTENSOR.cutensorAlgo_t(algo),
           )
           if this_time < best_time
             best_time = this_time
             #best_plan = this_plan
-            best_algo = CUDA.CUTENSOR.cutensorAlgo_t(algo)
+            best_algo = cuTENSOR.cutensorAlgo_t(algo)
           end
         catch err
           @warn "Algorithm $algo not supported"
@@ -333,7 +333,7 @@ function _contract!(
       ContractionPlans[dict_key] = best_algo
     end
   else
-    Cdata = CUDA.CUTENSOR.contraction!(
+    Cdata = cuTENSOR.contraction!(
       α,
       Adata,
       Vector{Char}(ctainds),
@@ -352,9 +352,9 @@ function _contract!(
 end
 
 function Base.:+(B::CuDenseTensor, A::CuDenseTensor)
-  opC = CUTENSOR.CUTENSOR_OP_IDENTITY
-  opA = CUTENSOR.CUTENSOR_OP_IDENTITY
-  opAC = CUTENSOR.CUTENSOR_OP_ADD
+  opC = cuTENSOR.CUTENSOR_OP_IDENTITY
+  opA = cuTENSOR.CUTENSOR_OP_IDENTITY
+  opAC = cuTENSOR.CUTENSOR_OP_ADD
   Ais = inds(A)
   Bis = inds(B)
   ind_dict = Vector{Index}()
@@ -375,7 +375,7 @@ function Base.:+(B::CuDenseTensor, A::CuDenseTensor)
   end
   ctcinds = copy(ctbinds)
   C = CUDA.zeros(eltype(Bdata), dims(Bis)...)
-  CUTENSOR.elementwiseBinary!(
+  cuTENSOR.elementwiseBinary!(
     one(eltype(Adata)),
     reshapeAdata,
     ctainds,
@@ -393,9 +393,9 @@ function Base.:+(B::CuDenseTensor, A::CuDenseTensor)
 end
 
 function Base.:+(B::CuDense, Bis::IndexSet, A::CuDense, Ais::IndexSet)
-  opA = CUTENSOR.CUTENSOR_OP_IDENTITY
-  opC = CUTENSOR.CUTENSOR_OP_IDENTITY
-  opAC = CUTENSOR.CUTENSOR_OP_ADD
+  opA = cuTENSOR.CUTENSOR_OP_IDENTITY
+  opC = cuTENSOR.CUTENSOR_OP_IDENTITY
+  opAC = cuTENSOR.CUTENSOR_OP_ADD
   ind_dict = Vector{Index}()
   for (idx, i) in enumerate(Ais)
     push!(ind_dict, i)
@@ -415,17 +415,17 @@ function Base.:+(B::CuDense, Bis::IndexSet, A::CuDense, Ais::IndexSet)
   ctcinds = copy(ctbinds)
   C = CUDA.zeros(eltype(Bdata), dims(Bis)...)
   Cis = Bis
-  C = CUTENSOR.elementwiseBinary!(
+  C = cuTENSOR.elementwiseBinary!(
     1, reshapeAdata, ctainds, opA, 1, reshapeBdata, ctbinds, opC, C, ctcinds, opAC
   )
   copyto!(data(B), vec(C))
   return C
 end
 
 function Base.:-(B::CuDenseTensor, A::CuDenseTensor)
-  opC = CUTENSOR.CUTENSOR_OP_IDENTITY
-  opA = CUTENSOR.CUTENSOR_OP_IDENTITY
-  opAC = CUTENSOR.CUTENSOR_OP_ADD
+  opC = cuTENSOR.CUTENSOR_OP_IDENTITY
+  opA = cuTENSOR.CUTENSOR_OP_IDENTITY
+  opAC = cuTENSOR.CUTENSOR_OP_ADD
   Ais = inds(A)
   Bis = inds(B)
   ind_dict = Vector{Index}()
@@ -446,7 +446,7 @@ function Base.:-(B::CuDenseTensor, A::CuDenseTensor)
   end
   ctcinds = copy(ctbinds)
   C = CUDA.zeros(eltype(Bdata), dims(Bis))
-  CUTENSOR.elementwiseBinary!(
+  cuTENSOR.elementwiseBinary!(
     -one(eltype(Adata)),
     reshapeAdata,
     ctainds,
@@ -464,9 +464,9 @@ function Base.:-(B::CuDenseTensor, A::CuDenseTensor)
 end
 
 function Base.:-(A::CuDense, Ais::IndexSet, B::CuDense, Bis::IndexSet)
-  opA = CUTENSOR.CUTENSOR_OP_IDENTITY
-  opC = CUTENSOR.CUTENSOR_OP_IDENTITY
-  opAC = CUTENSOR.CUTENSOR_OP_ADD
+  opA = cuTENSOR.CUTENSOR_OP_IDENTITY
+  opC = cuTENSOR.CUTENSOR_OP_IDENTITY
+  opAC = cuTENSOR.CUTENSOR_OP_ADD
   ind_dict = Vector{Index}()
   for (idx, i) in enumerate(Ais)
     push!(ind_dict, i)
@@ -486,7 +486,7 @@ function Base.:-(A::CuDense, Ais::IndexSet, B::CuDense, Bis::IndexSet)
   ctcinds = copy(ctbinds)
   C = CUDA.zeros(eltype(Bdata), dims(Bis)...)
   Cis = Bis
-  C = CUTENSOR.elementwiseBinary!(
+  C = cuTENSOR.elementwiseBinary!(
     one(eltype(Adata)),
     reshapeAdata,
     ctainds,
@@ -523,7 +523,7 @@ function Base.permute!(B::CuDenseTensor, A::CuDenseTensor)
     for (ii, ib) in enumerate(Bis)
       ctbinds[ii] = findfirst(x -> x == ib, ind_dict)
     end
-    CUDA.CUTENSOR.permutation!(
+    cuTENSOR.permutation!(
       one(eltype(Adata)),
       reshapeAdata,
       Vector{Char}(ctainds),
@@ -560,7 +560,7 @@ function Base.permute!(B::CuDense, Bis::IndexSet, A::CuDense, Ais::IndexSet)
     ctbinds[ii] = findfirst(x -> x == ib, ind_dict)
   end
 
-  CUDA.CUTENSOR.permutation!(
+  cuTENSOR.permutation!(
     one(eltype(Adata)),
     reshapeAdata,
     Vector{Char}(ctainds),

diff --git a/ITensorGPU/src/tensor/dense.jl b/ITensorGPU/src/tensor/dense.jl
@@ -110,7 +110,7 @@ function _big_contract!(
   for (ii, ic) in enumerate(Cinds)
     ctcinds[ii] = findfirst(x -> x == ic, ind_dict)
   end
-  id_op = CUDA.CUTENSOR.CUTENSOR_OP_IDENTITY
+  id_op = cuTENSOR.CUTENSOR_OP_IDENTITY
   dict_key = ""
   for cc in zip(ctcinds, Cdims)
     dict_key *= string(cc[1]) * "," * string(cc[2]) * ","
@@ -126,7 +126,7 @@ function _big_contract!(
       if haskey(ContractionPlans, dict_key)
           dict_val = ContractionPlans[dict_key]
           algo  = dict_val
-          Cdata = CUDA.CUTENSOR.contraction!(α, Adata, Vector{Char}(ctainds), id_op, Bdata, Vector{Char}(ctbinds), id_op, β, Cdata, Vector{Char}(ctcinds), id_op, id_op; algo=algo)
+          Cdata = cuTENSOR.contraction!(α, Adata, Vector{Char}(ctainds), id_op, Bdata, Vector{Char}(ctbinds), id_op, β, Cdata, Vector{Char}(ctcinds), id_op, id_op; algo=algo)
           synchronize()
       else
           # loop through all algos
@@ -136,17 +136,17 @@ function _big_contract!(
           best_plan = nothing
           best_algo = nothing
           max_algos = Ref{Int32}(C_NULL)
-          CUDA.CUTENSOR.cutensorContractionMaxAlgos(max_algos)
+          cuTENSOR.cutensorContractionMaxAlgos(max_algos)
           # fix once the other options are documented
-          #algos = collect(Cint(CUDA.CUTENSOR.CUTENSOR_ALGO_GETT):Cint(max_algos[] - 1))
-          algos = collect(Cint(CUDA.CUTENSOR.CUTENSOR_ALGO_GETT):Cint(-1))
+          #algos = collect(Cint(cuTENSOR.CUTENSOR_ALGO_GETT):Cint(max_algos[] - 1))
+          algos = collect(Cint(cuTENSOR.CUTENSOR_ALGO_GETT):Cint(-1))
           for algo in reverse(algos)
               try
-                  Cdata, this_time, bytes, gctime, memallocs = @timed CUDA.CUTENSOR.contraction!(α, Adata, Vector{Char}(ctainds), id_op, Bdata, Vector{Char}(ctbinds), id_op, β, Cdata, Vector{Char}(ctcinds), id_op, id_op; algo=CUDA.CUTENSOR.cutensorAlgo_t(algo))
+                  Cdata, this_time, bytes, gctime, memallocs = @timed cuTENSOR.contraction!(α, Adata, Vector{Char}(ctainds), id_op, Bdata, Vector{Char}(ctbinds), id_op, β, Cdata, Vector{Char}(ctcinds), id_op, id_op; algo=cuTENSOR.cutensorAlgo_t(algo))
                   synchronize()
                   if this_time < best_time
                       best_time = this_time
-                      best_algo = CUDA.CUTENSOR.cutensorAlgo_t(algo)
+                      best_algo = cuTENSOR.cutensorAlgo_t(algo)
                   end
               catch err
                   @warn "Algorithm $algo not supported"
@@ -180,12 +180,12 @@ function _big_contract!(
   #@assert !any(isnan.(AC))
   #@assert !any(isnan.(BC))
   #@assert !any(isnan.(CC))
-  #CC = CUDA.CUTENSOR.contraction!(α, AC, ctainds, id_op, BC, ctbinds, id_op, β, CC, ctcinds, id_op, id_op)
+  #CC = cuTENSOR.contraction!(α, AC, ctainds, id_op, BC, ctbinds, id_op, β, CC, ctcinds, id_op, id_op)
   #synchronize()
   #@assert !any(isnan.(AC))
   #@assert !any(isnan.(BC))
   #@assert !any(isnan.(CC))
-  Cdata = CUDA.CUTENSOR.contraction!(
+  Cdata = cuTENSOR.contraction!(
     α, Adata, ctainds, id_op, Bdata, ctbinds, id_op, β, Cdata, ctcinds, id_op, id_op
   )
   synchronize()

diff --git a/NDTensors/Project.toml b/NDTensors/Project.toml
@@ -1,7 +1,7 @@
 name = "NDTensors"
 uuid = "23ae76d9-e61a-49c4-8f12-3f1a16adf9cf"
 authors = ["Matthew Fishman <mfishman@flatironinstitute.org>"]
-version = "0.1.45"
+version = "0.1.46"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -19,7 +19,7 @@ TimerOutputs = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
 TupleTools = "9d95972d-f1c8-5527-a6e0-b4b365fa01f6"
 
 [compat]
-Adapt = "3.3, 3.4"
+Adapt = "3.5"
 Compat = "2.1, 3, 4"
 Dictionaries = "0.3.5"
 Functors = "0.2, 0.3, 0.4"