diff --git a/.github/workflows/test_itensorgaussianmps_ubuntu.yml b/.github/workflows/test_itensorgaussianmps_ubuntu.yml
index ccc165f75d..ad8e6c4f0b 100644
--- a/.github/workflows/test_itensorgaussianmps_ubuntu.yml
+++ b/.github/workflows/test_itensorgaussianmps_ubuntu.yml
@@ -33,13 +33,12 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - name: Install Julia dependencies
-        shell: julia --project=monorepo {0}
+      - name: Install Julia dependencies and run tests
+        shell: julia --depwarn=yes {0}
         run: |
           using Pkg;
-          Pkg.develop(path=".");
+          Pkg.activate(temp=true)
           Pkg.develop(path="./NDTensors");
+          Pkg.develop(path=".");
           Pkg.develop(path="./ITensorGaussianMPS");
-      - name: Run the tests
-        run: >
-          julia --project=monorepo --depwarn=yes -e 'using Pkg; Pkg.test("ITensorGaussianMPS")'
+          Pkg.test("ITensorGaussianMPS");
diff --git a/.github/workflows/test_itensormps_ubuntu.yml b/.github/workflows/test_itensormps_ubuntu.yml
index 6bd4faf238..cd2c654ffe 100644
--- a/.github/workflows/test_itensormps_ubuntu.yml
+++ b/.github/workflows/test_itensormps_ubuntu.yml
@@ -33,18 +33,15 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - name: Install Julia dependencies
-        shell: julia --project=monorepo {0}
+      - name: Install Julia dependencies and run tests
+        shell: julia {0}
         run: |
           using Pkg;
-          Pkg.develop(path=".");
+          Pkg.activate(temp=true);
           Pkg.develop(path="./NDTensors");
-      - name: Run the tests
-        shell: julia --project=monorepo {0}
-        run: |
-          using Pkg;
+          Pkg.develop(path=".");
           # https://github.com/JuliaLang/Pkg.jl/pull/1226
-          Pkg.test("ITensors"; coverage=true, test_args=["mps"])
+          Pkg.test("ITensors"; coverage=true, test_args=["mps"]);
       - uses: julia-actions/julia-uploadcodecov@latest
         env:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
diff --git a/.github/workflows/test_itensors_base_ubuntu.yml b/.github/workflows/test_itensors_base_ubuntu.yml
index c991a2dd0b..6d94b05d4f 100644
--- a/.github/workflows/test_itensors_base_ubuntu.yml
+++ b/.github/workflows/test_itensors_base_ubuntu.yml
@@ -33,17 +33,13 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - name: Install Julia dependencies
-        shell: julia --project=monorepo {0}
+      - name: Install Julia dependencies and run tests
+        shell: julia {0}
         run: |
           using Pkg;
-          Pkg.develop(path=".");
+          Pkg.activate(temp=true)
           Pkg.develop(path="./NDTensors");
-      - name: Run the tests
-        shell: julia --project=monorepo {0}
-        run: |
-          using Pkg;
-          # https://github.com/JuliaLang/Pkg.jl/pull/1226
+          Pkg.develop(path=".");
           Pkg.test("ITensors"; coverage=true, test_args=["base"])
       - uses: julia-actions/julia-uploadcodecov@latest
         env:
diff --git a/.github/workflows/test_itensorunicodeplots_ubuntu.yml b/.github/workflows/test_itensorunicodeplots_ubuntu.yml
index 833b3026b3..d496ca45c7 100644
--- a/.github/workflows/test_itensorunicodeplots_ubuntu.yml
+++ b/.github/workflows/test_itensorunicodeplots_ubuntu.yml
@@ -33,14 +33,13 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - name: Install Julia dependencies
-        shell: julia --project=monorepo {0}
+      - name: Install Julia dependencies and run tests
+        shell: julia --depwarn=yes {0}
         run: |
           using Pkg;
-          Pkg.develop(path=".");
+          Pkg.activate(temp=true);
           Pkg.develop(path="./NDTensors");
+          Pkg.develop(path=".");
           Pkg.develop(path="./ITensorVisualizationBase");
           Pkg.develop(path="./ITensorUnicodePlots");
-      - name: Run the tests
-        run: >
-          julia --project=monorepo --depwarn=yes -e 'using Pkg; Pkg.test("ITensorUnicodePlots")'
+          Pkg.test("ITensorUnicodePlots")
diff --git a/.github/workflows/test_itensorvisualization_ubuntu.yml b/.github/workflows/test_itensorvisualization_ubuntu.yml
index be6585c8cf..0c5ac112bc 100644
--- a/.github/workflows/test_itensorvisualization_ubuntu.yml
+++ b/.github/workflows/test_itensorvisualization_ubuntu.yml
@@ -33,13 +33,12 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - name: Install Julia dependencies
-        shell: julia --project=monorepo {0}
+      - name: Install Julia dependencies and run tests
+        shell: julia --depwarn=yes {0}
         run: |
           using Pkg;
-          Pkg.develop(path=".");
+          Pkg.activate(temp=true);
           Pkg.develop(path="./NDTensors");
+          Pkg.develop(path=".");
           Pkg.develop(path="./ITensorVisualizationBase");
-      - name: Run the tests
-        run: |
-          julia --project=monorepo --depwarn=yes -e 'using Pkg; Pkg.test("ITensorVisualizationBase")'
+          Pkg.test("ITensorVisualizationBase")
diff --git a/.github/workflows/test_ndtensors.yml b/.github/workflows/test_ndtensors.yml
index 9109345bd5..fcbc39b2b1 100644
--- a/.github/workflows/test_ndtensors.yml
+++ b/.github/workflows/test_ndtensors.yml
@@ -30,12 +30,11 @@ jobs:
         with:
           version: ${{ matrix.version }}
           arch: ${{ matrix.arch }}
-      - name: Install Julia dependencies
-        shell: julia --project=monorepo {0}
+      - name: Install Julia dependencies and run tests
+        shell: julia --depwarn=yes {0}
         run: |
           using Pkg;
-          Pkg.develop(path=".");
+          Pkg.activate(temp=true);
           Pkg.develop(path="./NDTensors");
-      - name: Run the tests
-        run: |
-          julia --project=monorepo --depwarn=yes -e 'using Pkg; Pkg.test("NDTensors")'
+          Pkg.develop(path=".");
+          Pkg.test("NDTensors");
diff --git a/NDTensors/Project.toml b/NDTensors/Project.toml
index d7e584589c..6fb8fc41d7 100644
--- a/NDTensors/Project.toml
+++ b/NDTensors/Project.toml
@@ -1,7 +1,7 @@
 name = "NDTensors"
 uuid = "23ae76d9-e61a-49c4-8f12-3f1a16adf9cf"
 authors = ["Matthew Fishman <mfishman@flatironinstitute.org>"]
-version = "0.2.30"
+version = "0.3.0"
 
 [deps]
 Accessors = "7d9f7c33-5ae7-4f3b-8dc6-eff91059b697"
diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/NDTensorsAMDGPUExt.jl b/NDTensors/ext/NDTensorsAMDGPUExt/NDTensorsAMDGPUExt.jl
index 76fa1b065c..34004a7b7f 100644
--- a/NDTensors/ext/NDTensorsAMDGPUExt/NDTensorsAMDGPUExt.jl
+++ b/NDTensors/ext/NDTensorsAMDGPUExt/NDTensorsAMDGPUExt.jl
@@ -1,5 +1,6 @@
 module NDTensorsAMDGPUExt
 
+include("append.jl")
 include("copyto.jl")
 include("set_types.jl")
 include("adapt.jl")
diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/adapt.jl b/NDTensors/ext/NDTensorsAMDGPUExt/adapt.jl
index 9e4a25c7a7..8ef943d674 100644
--- a/NDTensors/ext/NDTensorsAMDGPUExt/adapt.jl
+++ b/NDTensors/ext/NDTensorsAMDGPUExt/adapt.jl
@@ -22,7 +22,7 @@ function Adapt.adapt_storage(adaptor::ROCArrayAdaptor, xs::AbstractArray)
 end
 
 function NDTensors.adapt_storagetype(
-  adaptor::ROCArrayAdaptor, xs::Type{EmptyStorage{ElT,StoreT}}
+  adaptor::ROCArrayAdaptor, ::Type{EmptyStorage{ElT,StoreT}}
 ) where {ElT,StoreT}
   roctype = set_type_parameters(
     ROCVector, (eltype, storagemode), (ElT, storagemode(adaptor))
diff --git a/NDTensors/ext/NDTensorsAMDGPUExt/append.jl b/NDTensors/ext/NDTensorsAMDGPUExt/append.jl
new file mode 100644
index 0000000000..e84d8b064d
--- /dev/null
+++ b/NDTensors/ext/NDTensorsAMDGPUExt/append.jl
@@ -0,0 +1,8 @@
+using GPUArraysCore: @allowscalar
+using AMDGPU: ROCArray
+using NDTensors.Expose: Exposed, unexpose
+
+## Warning this append function uses scalar indexing and is therefore extremely slow
+function Base.append!(Ecollection::Exposed{<:ROCArray}, collections...)
+  return @allowscalar append!(unexpose(Ecollection), collections...)
+end
diff --git a/NDTensors/ext/NDTensorsCUDAExt/NDTensorsCUDAExt.jl b/NDTensors/ext/NDTensorsCUDAExt/NDTensorsCUDAExt.jl
index f53193bc5b..2a7a458ff9 100644
--- a/NDTensors/ext/NDTensorsCUDAExt/NDTensorsCUDAExt.jl
+++ b/NDTensors/ext/NDTensorsCUDAExt/NDTensorsCUDAExt.jl
@@ -1,15 +1,5 @@
 module NDTensorsCUDAExt
-
-using NDTensors
-using NDTensors.Expose
-using Adapt
-using Functors
-using LinearAlgebra: LinearAlgebra, Adjoint, Transpose, mul!, svd
-using CUDA
-using CUDA.CUBLAS
-using CUDA.CUSOLVER
-
-include("imports.jl")
+include("append.jl")
 include("default_kwargs.jl")
 include("copyto.jl")
 include("set_types.jl")
diff --git a/NDTensors/ext/NDTensorsCUDAExt/adapt.jl b/NDTensors/ext/NDTensorsCUDAExt/adapt.jl
index a4333d6f28..c47a9408be 100644
--- a/NDTensors/ext/NDTensorsCUDAExt/adapt.jl
+++ b/NDTensors/ext/NDTensorsCUDAExt/adapt.jl
@@ -1,24 +1,26 @@
-using NDTensors.TypeParameterAccessors: TypeParameterAccessors
-using NDTensors.GPUArraysCoreExtensions: storagemode
+using Adapt: Adapt
+using CUDA: CUDA, CuArray, CuVector
+using Functors: fmap
+using NDTensors: NDTensors, EmptyStorage, adapt_storagetype, emptytype
 using NDTensors.CUDAExtensions: CUDAExtensions, CuArrayAdaptor
+using NDTensors.GPUArraysCoreExtensions: storagemode
+using NDTensors.TypeParameterAccessors:
+  default_type_parameter, set_type_parameters, type_parameters
 
-## TODO make this work for unified. This works but overwrites CUDA's adapt_storage. This fails for emptystorage...
-function CUDAExtensions.cu(xs; unified::Bool=false)
-  return fmap(
-    x -> adapt(CuArrayAdaptor{unified ? Mem.UnifiedBuffer : Mem.DeviceBuffer}(), x), xs
-  )
+function CUDAExtensions.cu(xs; storagemode=default_type_parameter(CuArray, storagemode))
+  return fmap(x -> adapt(CuArrayAdaptor{storagemode}(), x), xs)
 end
 
+## Could do this generically
 function Adapt.adapt_storage(adaptor::CuArrayAdaptor, xs::AbstractArray)
-  ElT = eltype(xs)
-  BufT = storagemode(adaptor)
-  N = ndims(xs)
-  return isbits(xs) ? xs : adapt(CuArray{ElT,N,BufT}, xs)
+  params = (type_parameters(xs, (eltype, ndims))..., storagemode(adaptor))
+  cutype = set_type_parameters(CuArray, (eltype, ndims, storagemode), params)
+  return isbits(xs) ? xs : adapt(cutype, xs)
 end
 
 function NDTensors.adapt_storagetype(
-  adaptor::CuArrayAdaptor, xs::Type{EmptyStorage{ElT,StoreT}}
+  adaptor::CuArrayAdaptor, ::Type{EmptyStorage{ElT,StoreT}}
 ) where {ElT,StoreT}
-  BufT = storagemode(adaptor)
-  return NDTensors.emptytype(NDTensors.adapt_storagetype(CuVector{ElT,BufT}, StoreT))
+  cutype = set_type_parameters(CuVector, (eltype, storagemode), (ElT, storagemode(adaptor)))
+  return emptytype(adapt_storagetype(cutype, StoreT))
 end
diff --git a/NDTensors/ext/NDTensorsCUDAExt/append.jl b/NDTensors/ext/NDTensorsCUDAExt/append.jl
new file mode 100644
index 0000000000..48470e5131
--- /dev/null
+++ b/NDTensors/ext/NDTensorsCUDAExt/append.jl
@@ -0,0 +1,8 @@
+using GPUArraysCore: @allowscalar
+using CUDA: CuArray
+using NDTensors.Expose: Exposed, unexpose
+
+## Warning this append function uses scalar indexing and is therefore extremely slow
+function Base.append!(Ecollection::Exposed{<:CuArray}, collections...)
+  return @allowscalar append!(unexpose(Ecollection), collections...)
+end
diff --git a/NDTensors/ext/NDTensorsCUDAExt/copyto.jl b/NDTensors/ext/NDTensorsCUDAExt/copyto.jl
index 73867192c4..c3f136a9a6 100644
--- a/NDTensors/ext/NDTensorsCUDAExt/copyto.jl
+++ b/NDTensors/ext/NDTensorsCUDAExt/copyto.jl
@@ -1,3 +1,7 @@
+using CUDA: CuArray
+using NDTensors.Expose: Exposed, expose, unexpose
+using LinearAlgebra: Adjoint
+
 # Same definition as `MtlArray`.
 function Base.copy(src::Exposed{<:CuArray,<:Base.ReshapedArray})
   return reshape(copy(parent(src)), size(unexpose(src)))
diff --git a/NDTensors/ext/NDTensorsCUDAExt/default_kwargs.jl b/NDTensors/ext/NDTensorsCUDAExt/default_kwargs.jl
index 382185b36f..180e2143e3 100644
--- a/NDTensors/ext/NDTensorsCUDAExt/default_kwargs.jl
+++ b/NDTensors/ext/NDTensorsCUDAExt/default_kwargs.jl
@@ -1 +1,4 @@
+using CUDA: CuArray
+using NDTensors: NDTensors
+
 NDTensors.default_svd_alg(::Type{<:CuArray}, a) = "qr_algorithm"
diff --git a/NDTensors/ext/NDTensorsCUDAExt/imports.jl b/NDTensors/ext/NDTensorsCUDAExt/imports.jl
deleted file mode 100644
index df9f148d6a..0000000000
--- a/NDTensors/ext/NDTensorsCUDAExt/imports.jl
+++ /dev/null
@@ -1,3 +0,0 @@
-import NDTensors: similartype
-import NDTensors:
-  ContractionProperties, _contract!, GemmBackend, auto_select_backend, _gemm!, iscu
diff --git a/NDTensors/ext/NDTensorsCUDAExt/indexing.jl b/NDTensors/ext/NDTensorsCUDAExt/indexing.jl
index 263e2442a6..ac86e18da8 100644
--- a/NDTensors/ext/NDTensorsCUDAExt/indexing.jl
+++ b/NDTensors/ext/NDTensorsCUDAExt/indexing.jl
@@ -1,9 +1,14 @@
+using CUDA: CuArray
+using GPUArraysCore: @allowscalar
+using NDTensors: NDTensors
+using NDTensors.Expose: Exposed, expose, unexpose
+
 function Base.getindex(E::Exposed{<:CuArray})
-  return CUDA.@allowscalar unexpose(E)[]
+  return @allowscalar unexpose(E)[]
 end
 
 function Base.setindex!(E::Exposed{<:CuArray}, x::Number)
-  CUDA.@allowscalar unexpose(E)[] = x
+  @allowscalar unexpose(E)[] = x
   return unexpose(E)
 end
 
diff --git a/NDTensors/ext/NDTensorsCUDAExt/iscu.jl b/NDTensors/ext/NDTensorsCUDAExt/iscu.jl
index 5c3fc95a25..c0c7f30fa4 100644
--- a/NDTensors/ext/NDTensorsCUDAExt/iscu.jl
+++ b/NDTensors/ext/NDTensorsCUDAExt/iscu.jl
@@ -1 +1,4 @@
-iscu(::Type{<:CuArray}) = true
+using CUDA: CuArray
+using NDTensors: NDTensors
+
+NDTensors.iscu(::Type{<:CuArray}) = true
diff --git a/NDTensors/ext/NDTensorsCUDAExt/linearalgebra.jl b/NDTensors/ext/NDTensorsCUDAExt/linearalgebra.jl
index 4781386ea2..f76841e135 100644
--- a/NDTensors/ext/NDTensorsCUDAExt/linearalgebra.jl
+++ b/NDTensors/ext/NDTensorsCUDAExt/linearalgebra.jl
@@ -1,3 +1,10 @@
+using Adapt: adapt
+using CUDA: CUDA, CuMatrix
+using LinearAlgebra: Adjoint, svd
+using NDTensors: NDTensors
+using NDTensors.Expose: Expose, expose, ql, ql_positive
+using NDTensors.GPUArraysCoreExtensions: cpu
+using NDTensors.TypeParameterAccessors: unwrap_array_type
 function NDTensors.svd_catch_error(A::CuMatrix; alg::String="jacobi_algorithm")
   if alg == "jacobi_algorithm"
     alg = CUDA.CUSOLVER.JacobiAlgorithm()
@@ -42,9 +49,6 @@ function NDTensors.svd_catch_error(A::CuMatrix, ::CUDA.CUSOLVER.QRAlgorithm)
   return USV
 end
 
-using NDTensors.GPUArraysCoreExtensions: cpu
-using NDTensors.Expose: Expose, expose, ql, ql_positive
-using NDTensors.TypeParameterAccessors: unwrap_array_type
 ## TODO currently AMDGPU doesn't have ql so make a ql function
 function Expose.ql(A::Exposed{<:CuMatrix})
   Q, L = ql(expose(cpu(A)))
diff --git a/NDTensors/ext/NDTensorsCUDAExt/mul.jl b/NDTensors/ext/NDTensorsCUDAExt/mul.jl
index b3751b5fc1..624e20aaad 100644
--- a/NDTensors/ext/NDTensorsCUDAExt/mul.jl
+++ b/NDTensors/ext/NDTensorsCUDAExt/mul.jl
@@ -1,3 +1,7 @@
+using CUDA: CuArray
+using LinearAlgebra: LinearAlgebra, mul!, transpose
+using NDTensors.Expose: Exposed, expose, unexpose
+
 # This was calling generic matrix multiplication.
 # TODO: Raise an issue with `CUDA.jl`.
 function LinearAlgebra.mul!(
diff --git a/NDTensors/ext/NDTensorsCUDAExt/permutedims.jl b/NDTensors/ext/NDTensorsCUDAExt/permutedims.jl
index d482841e27..032c55c40a 100644
--- a/NDTensors/ext/NDTensorsCUDAExt/permutedims.jl
+++ b/NDTensors/ext/NDTensorsCUDAExt/permutedims.jl
@@ -1,3 +1,6 @@
+using CUDA: CuArray
+using NDTensors.Expose: Exposed, expose, unexpose
+
 function Base.permutedims!(
   Edest::Exposed{<:CuArray,<:Base.ReshapedArray}, Esrc::Exposed{<:CuArray}, perm
 )
diff --git a/NDTensors/ext/NDTensorsCUDAExt/set_types.jl b/NDTensors/ext/NDTensorsCUDAExt/set_types.jl
index 3b0d7a592a..5c3d52a141 100644
--- a/NDTensors/ext/NDTensorsCUDAExt/set_types.jl
+++ b/NDTensors/ext/NDTensorsCUDAExt/set_types.jl
@@ -1,4 +1,5 @@
 # TypeParameterAccessors definitions
+using CUDA: CUDA, CuArray
 using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position
 using NDTensors.GPUArraysCoreExtensions: storagemode
 
diff --git a/NDTensors/ext/NDTensorsMetalExt/NDTensorsMetalExt.jl b/NDTensors/ext/NDTensorsMetalExt/NDTensorsMetalExt.jl
index 320b417a5f..8cca86ca2a 100644
--- a/NDTensors/ext/NDTensorsMetalExt/NDTensorsMetalExt.jl
+++ b/NDTensors/ext/NDTensorsMetalExt/NDTensorsMetalExt.jl
@@ -1,14 +1,5 @@
 module NDTensorsMetalExt
 
-using Adapt
-using Functors
-using LinearAlgebra: LinearAlgebra, Adjoint, Transpose, mul!, qr, eigen, svd
-using NDTensors
-using NDTensors.Expose: qr_positive, ql_positive, ql
-
-using Metal
-
-include("imports.jl")
 include("adapt.jl")
 include("set_types.jl")
 include("indexing.jl")
diff --git a/NDTensors/ext/NDTensorsMetalExt/adapt.jl b/NDTensors/ext/NDTensorsMetalExt/adapt.jl
index 60c018b7c7..5034c71b29 100644
--- a/NDTensors/ext/NDTensorsMetalExt/adapt.jl
+++ b/NDTensors/ext/NDTensorsMetalExt/adapt.jl
@@ -1,17 +1,29 @@
-using NDTensors.MetalExtensions: MetalExtensions
-using NDTensors.GPUArraysCoreExtensions: GPUArraysCoreExtensions, set_storagemode
-using NDTensors.TypeParameterAccessors: specify_type_parameters, type_parameters
+using Adapt: Adapt, adapt
+using Functors: fmap
+using Metal: MtlArray, MtlVector, DefaultStorageMode
+using NDTensors: NDTensors, EmptyStorage, adapt_storagetype, emptytype
+using NDTensors.Expose: Exposed
+using NDTensors.MetalExtensions: MetalExtensions, MtlArrayAdaptor
+using NDTensors.GPUArraysCoreExtensions: GPUArraysCoreExtensions
+using NDTensors.TypeParameterAccessors: set_type_parameters, type_parameters
 
 GPUArraysCoreExtensions.cpu(e::Exposed{<:MtlArray}) = adapt(Array, e)
 
-function MetalExtensions.mtl(xs; storage=DefaultStorageMode)
-  return adapt(set_storagemode(MtlArray, storage), xs)
+function MetalExtensions.mtl(xs; storagemode=DefaultStorageMode)
+  return fmap(x -> adapt(MtlArrayAdaptor{storagemode}(), x), xs)
 end
 
-# More general than the version in Metal.jl
-## TODO Rewrite this using a custom `MtlArrayAdaptor` which will be written in  `MetalExtensions`.
-function Adapt.adapt_storage(arraytype::Type{<:MtlArray}, xs::AbstractArray)
-  params = type_parameters(xs)
-  arraytype_specified = specify_type_parameters(arraytype, params)
-  return isbitstype(typeof(xs)) ? xs : convert(arraytype_specified, xs)
+function Adapt.adapt_storage(adaptor::MtlArrayAdaptor, xs::AbstractArray)
+  new_parameters = (type_parameters(xs, (eltype, ndims))..., storagemode(adaptor))
+  mtltype = set_type_parameters(MtlArray, (eltype, ndims, storagemode), new_parameters)
+  return isbits(xs) ? xs : adapt(mtltype, xs)
+end
+
+function NDTensors.adapt_storagetype(
+  adaptor::MtlArrayAdaptor, ::Type{EmptyStorage{ElT,StoreT}}
+) where {ElT,StoreT}
+  mtltype = set_type_parameters(
+    MtlVector, (eltype, storagemode), (ElT, storagemode(adaptor))
+  )
+  return emptytype(adapt_storagetype(mtltype, StoreT))
 end
diff --git a/NDTensors/ext/NDTensorsMetalExt/append.jl b/NDTensors/ext/NDTensorsMetalExt/append.jl
index 7487004f2b..b48d2cfd4f 100644
--- a/NDTensors/ext/NDTensorsMetalExt/append.jl
+++ b/NDTensors/ext/NDTensorsMetalExt/append.jl
@@ -1,5 +1,10 @@
-# This circumvents an issues that `MtlArray` can't call `resize!`.
-# TODO: Raise an issue with Metal.jl.
-function NDTensors.append!!(::Type{<:MtlArray}, collection, collections...)
-  return vcat(collection, collections...)
+## Right now append! is broken on metal because of a missing resize! function
+## but make this available in the next release this will allow metal to work working
+using GPUArraysCore: @allowscalar
+using Metal: MtlArray
+using NDTensors.Expose: Exposed, unexpose
+
+## Warning this append function uses scalar indexing and is therefore extremely slow
+function Base.append!(Ecollection::Exposed{<:MtlArray}, collections...)
+  return @allowscalar append!(unexpose(Ecollection), collections...)
 end
diff --git a/NDTensors/ext/NDTensorsMetalExt/copyto.jl b/NDTensors/ext/NDTensorsMetalExt/copyto.jl
index c32b5f2f01..6c7aeb4b3c 100644
--- a/NDTensors/ext/NDTensorsMetalExt/copyto.jl
+++ b/NDTensors/ext/NDTensorsMetalExt/copyto.jl
@@ -1,3 +1,6 @@
+using Metal: MtlArray
+using NDTensors.Expose: Exposed, expose, unexpose
+
 function Base.copy(src::Exposed{<:MtlArray,<:Base.ReshapedArray})
   return reshape(copy(parent(src)), size(unexpose(src)))
 end
diff --git a/NDTensors/ext/NDTensorsMetalExt/imports.jl b/NDTensors/ext/NDTensorsMetalExt/imports.jl
deleted file mode 100644
index 48027dcefc..0000000000
--- a/NDTensors/ext/NDTensorsMetalExt/imports.jl
+++ /dev/null
@@ -1,3 +0,0 @@
-using NDTensors.Expose: Exposed, unexpose, expose
-using Metal: DefaultStorageMode
-using NDTensors: adapt
diff --git a/NDTensors/ext/NDTensorsMetalExt/indexing.jl b/NDTensors/ext/NDTensorsMetalExt/indexing.jl
index a682088c8a..8a37e44e05 100644
--- a/NDTensors/ext/NDTensorsMetalExt/indexing.jl
+++ b/NDTensors/ext/NDTensorsMetalExt/indexing.jl
@@ -1,9 +1,14 @@
+using Metal: MtlArray
+using GPUArraysCore: @allowscalar
+using LinearAlgebra: Adjoint
+using NDTensors.Expose: Exposed, expose, unexpose
+
 function Base.getindex(E::Exposed{<:MtlArray})
-  return Metal.@allowscalar unexpose(E)[]
+  return @allowscalar unexpose(E)[]
 end
 
 function Base.setindex!(E::Exposed{<:MtlArray}, x::Number)
-  Metal.@allowscalar unexpose(E)[] = x
+  @allowscalar unexpose(E)[] = x
   return unexpose(E)
 end
 
diff --git a/NDTensors/ext/NDTensorsMetalExt/linearalgebra.jl b/NDTensors/ext/NDTensorsMetalExt/linearalgebra.jl
index 982c10d40d..28d592506a 100644
--- a/NDTensors/ext/NDTensorsMetalExt/linearalgebra.jl
+++ b/NDTensors/ext/NDTensorsMetalExt/linearalgebra.jl
@@ -1,3 +1,6 @@
+using Metal: MtlMatrix
+using LinearAlgebra: LinearAlgebra, qr, eigen, svd
+using NDTensors.Expose: qr_positive, ql_positive, ql
 using NDTensors.TypeParameterAccessors:
   set_type_parameters, type_parameters, unwrap_array_type
 
diff --git a/NDTensors/ext/NDTensorsMetalExt/mul.jl b/NDTensors/ext/NDTensorsMetalExt/mul.jl
index 2abf8e8cbf..b6e13d9e74 100644
--- a/NDTensors/ext/NDTensorsMetalExt/mul.jl
+++ b/NDTensors/ext/NDTensorsMetalExt/mul.jl
@@ -1,3 +1,5 @@
+using Metal: MtlArray
+using LinearAlgebra: LinearAlgebra, Adjoint, Transpose, mul!
 # This was calling generic matrix multiplication.
 # TODO: Raise an issue with `Metal.jl`.
 function LinearAlgebra.mul!(
diff --git a/NDTensors/ext/NDTensorsMetalExt/permutedims.jl b/NDTensors/ext/NDTensorsMetalExt/permutedims.jl
index 40cc3f588c..0480103564 100644
--- a/NDTensors/ext/NDTensorsMetalExt/permutedims.jl
+++ b/NDTensors/ext/NDTensorsMetalExt/permutedims.jl
@@ -1,3 +1,5 @@
+using Metal: MtlArray
+using NDTensors.Expose: Exposed, expose, unexpose
 ## Theres an issue in metal that `ReshapedArray' wrapped arrays cannot be permuted using
 ## permutedims (failing in that Metal uses scalar indexing)
 ## These functions are to address the problem in different instances of permutedims
diff --git a/NDTensors/ext/NDTensorsMetalExt/set_types.jl b/NDTensors/ext/NDTensorsMetalExt/set_types.jl
index 98c540c397..72cdbd607b 100644
--- a/NDTensors/ext/NDTensorsMetalExt/set_types.jl
+++ b/NDTensors/ext/NDTensorsMetalExt/set_types.jl
@@ -1,6 +1,7 @@
+using Metal: Metal, MtlArray
 # `TypeParameterAccessors.jl` definitions.
 
-using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position, set_type_parameter
+using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position
 using NDTensors.GPUArraysCoreExtensions: storagemode
 
 ## TODO remove TypeParameterAccessors when SetParameters is removed
diff --git a/NDTensors/ext/examples/NDTensorCUDA.jl b/NDTensors/ext/examples/NDTensorCUDA.jl
deleted file mode 100644
index e7dcd77f18..0000000000
--- a/NDTensors/ext/examples/NDTensorCUDA.jl
+++ /dev/null
@@ -1,115 +0,0 @@
-using NDTensors
-using NDTensors.CUDAExtensions: cu
-using CUDA: CUDA, CuVector, reshape
-using ITensors:
-  Index, ITensor, randomMPO, randomMPS, inner, orthogonalize, qr, siteinds, svd
-using Test: @test
-using Zygote: gradient
-
-function main()
-  # using ITensorGPU
-  cpu = NDTensors.cpu
-  gpu = cu
-  # Here is an example of how to utilize NDTensors based tensors with CUDA datatypes
-  i = Index(2)
-  j = Index(5)
-  k = Index(3)
-  l = Index(6)
-
-  dim1 = (i, j, l)
-  dim2 = (j, k)
-
-  # Create  2 ITensors with CUDA backends (These will be made simpiler by randomITensor(CuVector) soon)
-  A = ITensor(randomTensor(CuVector, dim1))
-  B = ITensor(randomTensor(CuVector, dim2))
-  # Contract the two tensors
-  C = A * B
-  A = cpu(A)
-  B = cpu(B)
-  @test cpu(C) ≈ A * B
-  @test eltype(C) == Float64
-
-  # Create 2 ITensors on CPU with different eltypes
-  A = ITensor(Float32, dim1)
-  B = ITensor(Float64, dim2)
-
-  fill!(A, randn())
-  fill!(B, randn())
-
-  # Convert the ITensors to GPU
-  cA = gpu(A)
-  cB = gpu(B)
-
-  #Check that backend of contraction is GPU
-  @test A * A ≈ cpu(cA * cA)
-  @test B * B ≈ cpu(cB * cB)
-  @test A * B ≈ cpu(cA * cB)
-  @test B * A ≈ cpu(cB * cA)
-
-  dim3 = (l, k)
-  dim4 = (i,)
-  cC = ITensor(randomTensor(CuVector{Float64,CUDA.Mem.DeviceBuffer}, dim3))
-  cD = ITensor(Tensor(CuVector{Float32}, dim4))
-  fill!(cD, randn())
-
-  # Create a function of 4 tensors on GPU
-  f(A, B, C, D) = (A * B * C * D)[]
-
-  #Use Zygote to take the gradient of the four tensors on GPU
-  #Currently this code fails with CUDA.allowscalar(false)
-  # Because of outer calling the _gemm! function which calls a
-  # generic implementation
-  grad = gradient(f, cA, cB, cC, cD)
-  @test cpu(cB * cC * cD) ≈ cpu(grad[1])
-  @test (cB * cC * cD) ≈ grad[1]
-  # Create a tuple of indices
-  dims = size(grad[1])
-  decomp = (dims[1], dims[2] * dims[3])
-  # Reshape the CuVector of data into a matrix
-  cuTensor_data = reshape(array(grad[1]), decomp)
-  # Use cuBLAS to compute SVD of data
-  U, S, V = svd(cuTensor_data)
-  decomp = size(array(grad[2]))
-  cuTensor_data = reshape(array(grad[2]), decomp)
-  U, S, V = svd(cuTensor_data)
-
-  # These things can take up lots of memory, look at memory usage here
-  cuTensor_data = U = S = V = nothing
-  GC.gc()
-  CUDA.memory_status()
-
-  # Get rid of the gradients and clean the CUDA memory
-  CUDA.reclaim()
-  CUDA.memory_status()
-
-  # Its possible to compute QR of GPU tensor
-  cq = qr(cA, (i,), (j, l))
-  A ≈ cpu(cq[1]) * cpu(cq[2])
-
-  ## SVD does not yet work with CUDA backend, see above on
-  ## Converting ITensors to vectors and calling CUDA svd function
-  ## CuVectors...
-  #ITensors.svd(A, (i,), (j, l))
-
-  s = siteinds("S=1/2", 8)
-  m = randomMPS(s; linkdims=4)
-  cm = gpu(m)
-
-  @test inner(cm', cm) ≈ inner(m', m)
-
-  H = randomMPO(s)
-  cH = gpu(H)
-  @test inner(cm', cH, cm) ≈ inner(m', H, m)
-
-  m = orthogonalize(m, 1)
-  cm = gpu(orthogonalize(cm, 1))
-  @test inner(m', m) ≈ inner(cm', cm)
-
-  H = orthogonalize(H, 1)
-  cH = gpu(cH)
-
-  @test inner(cm', cH, cm) ≈ inner(m', H, m)
-end
-
-## running the main function with Float64
-main()
diff --git a/NDTensors/ext/examples/NDTensorMetal.jl b/NDTensors/ext/examples/NDTensorMetal.jl
deleted file mode 100644
index 6f7c2e44b1..0000000000
--- a/NDTensors/ext/examples/NDTensorMetal.jl
+++ /dev/null
@@ -1,43 +0,0 @@
-using Metal: MtlVector
-using NDTensors
-using NDTensors.MetalExtensions: mtl
-
-using ITensors: ITensor, Index, randomITensor
-using Test: @test
-using Zygote: gradient
-
-function main()
-  cpu = NDTensors.cpu
-  gpu = mtl
-  # Here is an example of how to utilize NDTensors based tensors with CUDA datatypes
-  i = Index(20)
-  j = Index(5)
-  k = Index(78)
-  l = Index(62)
-
-  dim1 = (i, j, l)
-  dim2 = (j, k)
-
-  ## MtlArrays only support Float32 arithmatic
-  cA = ITensor(randomTensor(MtlVector{Float32}, dim1))
-  cB = ITensor(randomTensor(MtlVector{Float32}, dim2))
-  cC = cA * cB
-
-  A = cpu(cA)
-  B = cpu(cB)
-
-  @test A * B ≈ cpu(cC)
-
-  dim3 = (l, k)
-  dim4 = (i,)
-
-  cC = gpu(randomITensor(Float32, dim3))
-  cD = gpu(randomITensor(Float32, dim4))
-
-  f(A, B, C, D) = (A * B * C * D)[]
-
-  grad = gradient(f, cA, cB, cC, cD)
-  @test grad[2] ≈ cA * cC * cD
-end
-
-main()
diff --git a/NDTensors/ext/examples/Readme.md b/NDTensors/ext/examples/Readme.md
deleted file mode 100644
index a652dbec4a..0000000000
--- a/NDTensors/ext/examples/Readme.md
+++ /dev/null
@@ -1,10 +0,0 @@
-In this file I will layout how to construct a new backend NDTensor datatype storage system and what is required of the datatype.
-
-One should start by constructing a folder designated `NDTensor___` where ___ is replaced with the name of the datatype. In this folder one creates an `NDTensor___.jl` file. This is the only file which should link `NDTensors` to your new datatype.  In this file, one can import all necessary packages for your datatype. These packages should NOT be added to the `Project.toml`.
-
-## TODO This is no longer necessary, should it be deleted?
-The most necessary portion is the `set_type.jl` file. In this file, one must define `set_eltype` and `set_ndims`. These functions allow our adapt system properly interperet the datatype and construct new tensors with that datatype.  Be sure to (import and) define these functions as `NDTensors.set_eltype` and `NDTensors.set_ndims`.
-
-In general, I have found that Adapt functions (like `cu` or `mtl`) do work properly with ITensors and NDTensors. However, we have decided to construct our own versions of these functions because the ones constructed by `CUDA.jl` and `Metal.jl` do not preserve the element type of `Numbers` and convert all numbers, bar `Float16`, to `Float32`.  It is possible define ones own adapt functions by utilizing the `adapt_structure` function defined in `NDTensors`.
-
-To perform linear algebra operations, like matrix-matrix multiplication or orthogonalization procedures, `NDTensors` depends on reliable definitions of functions like `mul!` and `svd`, etc. For your datatype to utilize these operations, these generic functions must be defined and linked to a proper definition of BLAS/LAPACK.
\ No newline at end of file
diff --git a/NDTensors/src/NDTensors.jl b/NDTensors/src/NDTensors.jl
index 0d75e68fe1..7e7cffb8dc 100644
--- a/NDTensors/src/NDTensors.jl
+++ b/NDTensors/src/NDTensors.jl
@@ -15,9 +15,8 @@ include("abstractarray/to_shape.jl")
 include("abstractarray/iscu.jl")
 include("abstractarray/similar.jl")
 include("abstractarray/mul.jl")
-include("abstractarray/append.jl")
 include("abstractarray/permutedims.jl")
-include("abstractarray/fill.jl")
+include("abstractarray/generic_array_constructors.jl")
 include("array/permutedims.jl")
 include("array/mul.jl")
 include("tupletools.jl")
@@ -45,7 +44,7 @@ include("dense/tensoralgebra/contract.jl")
 include("dense/linearalgebra/decompositions.jl")
 include("dense/tensoralgebra/outer.jl")
 include("dense/set_types.jl")
-include("dense/fill.jl")
+include("dense/generic_array_constructors.jl")
 include("linearalgebra/symmetric.jl")
 include("linearalgebra/linearalgebra.jl")
 include("diag/diag.jl")
diff --git a/NDTensors/src/abstractarray/append.jl b/NDTensors/src/abstractarray/append.jl
deleted file mode 100644
index 76d00ed474..0000000000
--- a/NDTensors/src/abstractarray/append.jl
+++ /dev/null
@@ -1,12 +0,0 @@
-using .TypeParameterAccessors: unwrap_array_type
-# NDTensors.append!
-# Used to circumvent issues with some GPU backends like Metal
-# not supporting `resize!`.
-# TODO: Change this over to use `expose`.
-function append!!(collection, collections...)
-  return append!!(unwrap_array_type(collection), collection, collections...)
-end
-
-function append!!(::Type, collection, collections...)
-  return append!(collection, collections...)
-end
diff --git a/NDTensors/src/abstractarray/fill.jl b/NDTensors/src/abstractarray/fill.jl
deleted file mode 100644
index 56d3730784..0000000000
--- a/NDTensors/src/abstractarray/fill.jl
+++ /dev/null
@@ -1,15 +0,0 @@
-using .TypeParameterAccessors: unwrap_array_type, specify_default_type_parameters
-
-function generic_randn(
-  arraytype::Type{<:AbstractArray}, dim::Integer=0; rng=Random.default_rng()
-)
-  arraytype_specified = specify_default_type_parameters(unwrap_array_type(arraytype))
-  data = similar(arraytype_specified, dim)
-  return randn!(rng, data)
-end
-
-function generic_zeros(arraytype::Type{<:AbstractArray}, dims...)
-  arraytype_specified = specify_default_type_parameters(unwrap_array_type(arraytype))
-  ElT = eltype(arraytype_specified)
-  return fill!(similar(arraytype_specified, dims...), zero(ElT))
-end
diff --git a/NDTensors/src/abstractarray/generic_array_constructors.jl b/NDTensors/src/abstractarray/generic_array_constructors.jl
new file mode 100644
index 0000000000..13d142980b
--- /dev/null
+++ b/NDTensors/src/abstractarray/generic_array_constructors.jl
@@ -0,0 +1,36 @@
+using .TypeParameterAccessors:
+  unwrap_array_type, specify_default_type_parameters, type_parameter
+
+## Warning to use these functions it is necessary to define `TypeParameterAccessors.position(::Type{<:YourArrayType}, ::typeof(ndims)))`
+# Implementation, catches if `ndims(arraytype) != length(dims)`.
+## TODO convert ndims to `type_parameter(::, typeof(ndims))`
+function generic_randn(arraytype::Type{<:AbstractArray}, dims...; rng=Random.default_rng())
+  arraytype_specified = specify_type_parameter(
+    unwrap_array_type(arraytype), ndims, length(dims)
+  )
+  arraytype_specified = specify_default_type_parameters(arraytype_specified)
+  @assert length(dims) == ndims(arraytype_specified)
+  data = similar(arraytype_specified, dims...)
+  return randn!(rng, data)
+end
+
+function generic_randn(
+  arraytype::Type{<:AbstractArray}, dims::Tuple; rng=Random.default_rng()
+)
+  return generic_randn(arraytype, dims...; rng)
+end
+
+# Implementation, catches if `ndims(arraytype) != length(dims)`.
+function generic_zeros(arraytype::Type{<:AbstractArray}, dims...)
+  arraytype_specified = specify_type_parameter(
+    unwrap_array_type(arraytype), ndims, length(dims)
+  )
+  arraytype_specified = specify_default_type_parameters(arraytype_specified)
+  @assert length(dims) == ndims(arraytype_specified)
+  ElT = eltype(arraytype_specified)
+  return fill!(similar(arraytype_specified, dims...), zero(ElT))
+end
+
+function generic_zeros(arraytype::Type{<:AbstractArray}, dims::Tuple)
+  return generic_zeros(arraytype, dims...)
+end
diff --git a/NDTensors/src/blocksparse/blocksparsetensor.jl b/NDTensors/src/blocksparse/blocksparsetensor.jl
index 788f7a2c09..6ff06ab0ca 100644
--- a/NDTensors/src/blocksparse/blocksparsetensor.jl
+++ b/NDTensors/src/blocksparse/blocksparsetensor.jl
@@ -256,6 +256,7 @@ end
 # Returns the offset of the new block added.
 # XXX rename to insertblock!, no need to return offset
 using .TypeParameterAccessors: unwrap_array_type
+using .Expose: expose
 function insertblock_offset!(T::BlockSparseTensor{ElT,N}, newblock::Block{N}) where {ElT,N}
   newdim = blockdim(T, newblock)
   newoffset = nnz(T)
@@ -264,7 +265,7 @@ function insertblock_offset!(T::BlockSparseTensor{ElT,N}, newblock::Block{N}) wh
   new_data = generic_zeros(unwrap_array_type(T), newdim)
   # TODO: `append!` is broken on `Metal` since `resize!`
   # isn't implemented.
-  append!(data(T), new_data)
+  append!(expose(data(T)), new_data)
   return newoffset
 end
 
@@ -725,6 +726,7 @@ end
 # <fermions>
 permfactor(perm, block, inds) = 1
 
+using .TypeParameterAccessors: set_type_parameters, parenttype
 function permutedims!(
   R::BlockSparseTensor{<:Number,N},
   T::BlockSparseTensor{<:Number,N},
@@ -751,17 +753,20 @@ function permutedims!(
       # Rblock doesn't exist
       block_size = permute(size(Tblock), perm)
       # TODO: Make GPU friendly.
-      Rblock = tensor(Dense(zeros(eltype(R), block_size)), block_size)
+      DenseT = set_type_parameters(Dense, (eltype, parenttype), (eltype(R), datatype(R)))
+      Rblock = tensor(generic_zeros(DenseT, prod(block_size)), block_size)
     elseif !Tblock_exists
       # Tblock doesn't exist
       block_size = permute(size(Rblock), invperm(perm))
       # TODO: Make GPU friendly.
-      Tblock = tensor(Dense(zeros(eltype(T), block_size)), block_size)
+      DenseT = set_type_parameters(Dense, (eltype, parenttype), (eltype(T), datatype(T)))
+      Tblock = tensor(generic_zeros(DenseT, prod(block_size)), block_size)
     end
     permutedims!(Rblock, Tblock, perm, f_fac)
     if !Rblock_exists
       # Set missing nonzero block
-      if !iszero(Rblock)
+      ## To make sure no allowscalar issue grab the data
+      if !iszero(data(Rblock))
         R[block] = Rblock
       end
     end
diff --git a/NDTensors/src/blocksparse/linearalgebra.jl b/NDTensors/src/blocksparse/linearalgebra.jl
index 17b5eac41b..23e332ba4c 100644
--- a/NDTensors/src/blocksparse/linearalgebra.jl
+++ b/NDTensors/src/blocksparse/linearalgebra.jl
@@ -1,4 +1,5 @@
 using .TypeParameterAccessors: unwrap_array_type
+using .Expose: expose
 const BlockSparseMatrix{ElT,StoreT,IndsT} = BlockSparseTensor{ElT,2,StoreT,IndsT}
 const DiagBlockSparseMatrix{ElT,StoreT,IndsT} = DiagBlockSparseTensor{ElT,2,StoreT,IndsT}
 const DiagMatrix{ElT,StoreT,IndsT} = DiagTensor{ElT,2,StoreT,IndsT}
@@ -68,7 +69,7 @@ function svd(
     # TODO: call this a function `diagonal`, i.e.:
     # https://github.com/JuliaLang/julia/issues/30250
     # or make `diag(::Tensor)` return a view by default.
-    append!(d, data(Sb))
+    append!(expose(d), data(Sb))
   end
 
   # Square the singular values to get
@@ -234,14 +235,14 @@ function LinearAlgebra.eigen(
   Db, Vb = eigen(expose(blockT))
   Ds = [Db]
   Vs = [Vb]
-  append!(d, abs.(data(Db)))
+  append!(expose(d), abs.(data(Db)))
   for (n, b) in enumerate(eachnzblock(T))
     n == 1 && continue
     blockT = blockview(T, b)
     Db, Vb = eigen(expose(blockT))
     push!(Ds, Db)
     push!(Vs, Vb)
-    append!(d, abs.(data(Db)))
+    append!(expose(d), abs.(data(Db)))
   end
 
   dropblocks = Int[]
diff --git a/NDTensors/src/dense/fill.jl b/NDTensors/src/dense/fill.jl
deleted file mode 100644
index f1f9d0a824..0000000000
--- a/NDTensors/src/dense/fill.jl
+++ /dev/null
@@ -1,39 +0,0 @@
-##TODO replace randn in ITensors with generic_randn
-## and replace zeros with generic_zeros
-
-# This is a file to write generic fills for NDTensors.
-#  This includes random fills, zeros, ...
-
-function generic_randn(
-  StoreT::Type{<:Dense{ElT,DataT}}, dim::Integer=0
-) where {DataT<:AbstractArray,ElT}
-  @assert ElT == eltype(DataT)
-  data = generic_randn(DataT, dim)
-  StoreT = set_datatype(StoreT, typeof(data))
-  return StoreT(data)
-end
-
-function generic_randn(StoreT::Type{<:Dense{ElT}}, dim::Integer=0) where {ElT}
-  return generic_randn(default_storagetype(ElT), dim)
-end
-
-function generic_randn(StoreT::Type{<:Dense}, dim::Integer=0)
-  return generic_randn(default_storagetype(), dim)
-end
-
-function generic_zeros(
-  StoreT::Type{<:Dense{ElT,DataT}}, dim::Integer=0
-) where {DataT<:AbstractArray,ElT}
-  @assert ElT == eltype(DataT)
-  data = generic_zeros(DataT, dim)
-  StoreT = set_datatype(StoreT, typeof(data))
-  return StoreT(data)
-end
-
-function generic_zeros(StoreT::Type{<:Dense{ElT}}, dim::Integer=0) where {ElT}
-  return generic_zeros(default_storagetype(ElT), dim)
-end
-
-function generic_zeros(StoreT::Type{<:Dense}, dim::Integer=0)
-  return generic_zeros(default_storagetype(), dim)
-end
diff --git a/NDTensors/src/dense/generic_array_constructors.jl b/NDTensors/src/dense/generic_array_constructors.jl
new file mode 100644
index 0000000000..41057bf1be
--- /dev/null
+++ b/NDTensors/src/dense/generic_array_constructors.jl
@@ -0,0 +1,31 @@
+using .TypeParameterAccessors:
+  default_type_parameter,
+  parenttype,
+  set_eltype,
+  specify_default_type_parameters,
+  type_parameter
+##TODO replace randn in ITensors with generic_randn
+## and replace zeros with generic_zeros
+
+# This is a file to write generic fills for NDTensors.
+#  This includes random fills, zeros, ...
+
+function generic_randn(StoreT::Type{<:Dense}, dims::Integer; rng=Random.default_rng())
+  StoreT = specify_default_type_parameters(StoreT)
+  DataT = specify_type_parameter(type_parameter(StoreT, parenttype), eltype, eltype(StoreT))
+  @assert eltype(StoreT) == eltype(DataT)
+
+  data = generic_randn(DataT, dims; rng=rng)
+  StoreT = set_datatype(StoreT, typeof(data))
+  return StoreT(data)
+end
+
+function generic_zeros(StoreT::Type{<:Dense}, dims::Integer)
+  StoreT = specify_default_type_parameters(StoreT)
+  DataT = specify_type_parameter(type_parameter(StoreT, parenttype), eltype, eltype(StoreT))
+  @assert eltype(StoreT) == eltype(DataT)
+
+  data = generic_zeros(DataT, dims)
+  StoreT = set_datatype(StoreT, typeof(data))
+  return StoreT(data)
+end
diff --git a/NDTensors/src/imports.jl b/NDTensors/src/imports.jl
index 21d3bcd5d5..98342d8b3b 100644
--- a/NDTensors/src/imports.jl
+++ b/NDTensors/src/imports.jl
@@ -28,11 +28,11 @@ for lib in [
   :BaseExtensions,
   :UnspecifiedTypes,
   :TypeParameterAccessors,
+  :Expose,
   :GPUArraysCoreExtensions,
   :AMDGPUExtensions,
   :CUDAExtensions,
   :MetalExtensions,
-  :Expose,
   :BroadcastMapConversion,
   :RankFactorization,
   :Sectors,
diff --git a/NDTensors/src/lib/CUDAExtensions/src/cuda.jl b/NDTensors/src/lib/CUDAExtensions/src/cuda.jl
index 9fa41e2f5b..44b8007672 100644
--- a/NDTensors/src/lib/CUDAExtensions/src/cuda.jl
+++ b/NDTensors/src/lib/CUDAExtensions/src/cuda.jl
@@ -1,4 +1,4 @@
-using NDTensors.TypeParameterAccessors: TypeParameterAccessors
+using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position
 using NDTensors.GPUArraysCoreExtensions: storagemode
 # Implemented in `ITensorGPU` and NDTensorsCUDAExt
 function cu end
@@ -9,7 +9,6 @@ function cu end
 ## default_buffertype. Also `adapt(CuVector{<:Any, <:Any, Buffertype})` fails to work properly
 struct CuArrayAdaptor{B} end
 
-## TODO remove TypeParameterAccessors when SetParameters is removed
 function TypeParameterAccessors.position(::Type{<:CuArrayAdaptor}, ::typeof(storagemode))
-  return TypeParameterAccessors.Position(1)
+  return Position(1)
 end
diff --git a/NDTensors/src/lib/Expose/src/Expose.jl b/NDTensors/src/lib/Expose/src/Expose.jl
index e3d1c550c8..fb64927d7b 100644
--- a/NDTensors/src/lib/Expose/src/Expose.jl
+++ b/NDTensors/src/lib/Expose/src/Expose.jl
@@ -12,6 +12,7 @@ include("import.jl")
 ## using that type
 ## Exposed based functions
 include("functions/abstractarray.jl")
+include("functions/append.jl")
 include("functions/copyto.jl")
 include("functions/linearalgebra.jl")
 include("functions/mul.jl")
diff --git a/NDTensors/src/lib/Expose/src/functions/abstractarray.jl b/NDTensors/src/lib/Expose/src/functions/abstractarray.jl
index 3562057844..ca6e573c56 100644
--- a/NDTensors/src/lib/Expose/src/functions/abstractarray.jl
+++ b/NDTensors/src/lib/Expose/src/functions/abstractarray.jl
@@ -1,13 +1,8 @@
-using NDTensors.GPUArraysCoreExtensions: GPUArraysCoreExtensions, cpu
-
 parent(E::Exposed) = parent(unexpose(E))
 
 transpose(E::Exposed) = transpose(unexpose(E))
 
 adjoint(E::Exposed) = adjoint(unexpose(E))
-
-GPUArraysCoreExtensions.cpu(E::Exposed) = cpu(unexpose(E))
-
 getindex(E::Exposed) = unexpose(E)[]
 
 function setindex!(E::Exposed, x::Number)
diff --git a/NDTensors/src/lib/Expose/src/functions/append.jl b/NDTensors/src/lib/Expose/src/functions/append.jl
new file mode 100644
index 0000000000..a72e895bed
--- /dev/null
+++ b/NDTensors/src/lib/Expose/src/functions/append.jl
@@ -0,0 +1,3 @@
+function Base.append!(Ecollection::Exposed, collections...)
+  return append!(unexpose(Ecollection), collections...)
+end
diff --git a/NDTensors/src/lib/Expose/test/runtests.jl b/NDTensors/src/lib/Expose/test/runtests.jl
index ff9b93e70c..a88039bdc6 100644
--- a/NDTensors/src/lib/Expose/test/runtests.jl
+++ b/NDTensors/src/lib/Expose/test/runtests.jl
@@ -17,6 +17,7 @@ using LinearAlgebra:
 using GPUArraysCore: @allowscalar
 include(joinpath(pkgdir(NDTensors), "test", "NDTensorsTestUtils", "NDTensorsTestUtils.jl"))
 using .NDTensorsTestUtils: devices_list
+using NDTensors.GPUArraysCoreExtensions: cpu
 
 @testset "Testing Expose $dev, $elt" for dev in devices_list(ARGS),
   elt in (Float32, ComplexF32)
@@ -235,5 +236,29 @@ using .NDTensorsTestUtils: devices_list
   zero(C)
   mul!!(C, B, A, true, false)
   @test cpu(C) ≈ Cp
+
+  ##################################
+  ### Add test for append! to address scalar indexing in GPUs
+  ## For now, Metal doesn't have a `resize!` function so all the tests are failing
+  if (dev == NDTensors.mtl)
+    continue
+  end
+  A = dev(randn(elt, 10))
+  Ap = copy(A)
+  B = randn(elt, 3)
+  C = append!(expose(A), B)
+
+  @test length(C) == 13
+  @test sum(C) ≈ sum(Ap) + sum(B)
+
+  A = Ap
+  B = dev(randn(elt, 29))
+  Bp = copy(B)
+  C = append!(expose(B), A)
+  @test length(C) == 39
+  @test sum(C) ≈ sum(Bp) + sum(Ap)
+  @allowscalar for i in 1:length(B)
+    C[i] == B[i]
+  end
 end
 end
diff --git a/NDTensors/src/lib/GPUArraysCoreExtensions/src/gpuarrayscore.jl b/NDTensors/src/lib/GPUArraysCoreExtensions/src/gpuarrayscore.jl
index efcff7bb51..5c7c858de1 100644
--- a/NDTensors/src/lib/GPUArraysCoreExtensions/src/gpuarrayscore.jl
+++ b/NDTensors/src/lib/GPUArraysCoreExtensions/src/gpuarrayscore.jl
@@ -1,3 +1,5 @@
+using GPUArraysCore: AbstractGPUArray, @allowscalar
+using NDTensors.Expose: Exposed, unexpose
 using NDTensors.TypeParameterAccessors:
   TypeParameterAccessors, type_parameter, set_type_parameter
 
@@ -13,3 +15,5 @@ function set_storagemode(type::Type, param)
 end
 
 function cpu end
+
+cpu(E::Exposed) = cpu(unexpose(E))
diff --git a/NDTensors/src/lib/MetalExtensions/src/metal.jl b/NDTensors/src/lib/MetalExtensions/src/metal.jl
index e28063ecad..1e2855c7c3 100644
--- a/NDTensors/src/lib/MetalExtensions/src/metal.jl
+++ b/NDTensors/src/lib/MetalExtensions/src/metal.jl
@@ -1,2 +1,15 @@
-# Implemented in `ITensorGPU` and NDTensorCUDA
+using NDTensors.TypeParameterAccessors: TypeParameterAccessors, Position
+using NDTensors.GPUArraysCoreExtensions: storagemode
+# Implemented in `ITensorGPU` and NDTensorsMetalExt
 function mtl end
+
+## Here we need an MtlArrayAdaptor because the MtlArrayAdaptor provided by Metal
+## converts 64 bit numbers to 32 bit.  We cannot write `adapt(MtlArray, x)` because this
+## Will not allow us to properly utilize the buffer preference without changing the value of
+## default_buffertype. Also `adapt(MtlArray{<:Any, <:Any, Buffertype})` fails to work properly 
+
+struct MtlArrayAdaptor{B} end
+
+function TypeParameterAccessors.position(::Type{<:MtlArrayAdaptor}, ::typeof(storagemode))
+  return Position(1)
+end
diff --git a/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl b/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl
index cc6dd00e2b..430890e005 100644
--- a/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl
+++ b/NDTensors/test/ITensors/TestITensorDMRG/TestITensorDMRG.jl
@@ -15,13 +15,8 @@ reference_energies = Dict([
 
 is_broken(dev, elt::Type, conserve_qns::Val) = false
 
-## Disable blocksparse GPU testing on CUDA, Metal and ROC backends.
 ## Currently Metal fails because we are waiting for `resize!` to be added. Should be in the next metal release
-## CUDA fails because there is no defined `append!`.
-## ROC fails because TODO determine again why roc fails.
-is_broken(dev::typeof(cu), elt::Type, conserve_qns::Val{true}) = true
 is_broken(dev::typeof(mtl), elt::Type, conserve_qns::Val{true}) = true
-is_broken(dev::typeof(roc), elt::Type, conserve_qns::Val{true}) = true
 
 include("dmrg.jl")
 
diff --git a/NDTensors/test/runtests.jl b/NDTensors/test/runtests.jl
index 66366562c1..2a05921e40 100644
--- a/NDTensors/test/runtests.jl
+++ b/NDTensors/test/runtests.jl
@@ -15,12 +15,6 @@ using SafeTestsets: @safetestset
       include(filename)
     end
   end
-  if "cuda" in ARGS || "all" in ARGS
-    include(joinpath(pkgdir(NDTensors), "ext", "examples", "NDTensorCUDA.jl"))
-  end
-  if "metal" in ARGS || "all" in ARGS
-    include(joinpath(pkgdir(NDTensors), "ext", "examples", "NDTensorMetal.jl"))
-  end
 end
 
 nothing
diff --git a/Project.toml b/Project.toml
index e5927dd8af..6acb6a706c 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "ITensors"
 uuid = "9136182c-28ba-11e9-034c-db9fb085ebd5"
 authors = ["Matthew Fishman <mfishman@flatironinstitute.org>", "Miles Stoudenmire <mstoudenmire@flatironinstitute.org>"]
-version = "0.3.66"
+version = "0.3.67"
 
 [deps]
 Adapt = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
@@ -52,7 +52,7 @@ IsApprox = "0.1"
 KrylovKit = "0.4.2, 0.5, 0.6, 0.7"
 LinearAlgebra = "1.6"
 LinearMaps = "3"
-NDTensors = "0.2.30"
+NDTensors = "0.3.0"
 PackageCompiler = "1.0.0, 2"
 PackageExtensionCompat = "1"
 Pkg = "1.6"
diff --git a/src/imports.jl b/src/imports.jl
index 87f089ea1b..287a1eee62 100644
--- a/src/imports.jl
+++ b/src/imports.jl
@@ -110,7 +110,7 @@ import LinearAlgebra:
   tr,
   transpose
 
-using ITensors.NDTensors.Expose: cpu
+using ITensors.NDTensors.GPUArraysCoreExtensions: cpu
 
 using ITensors.NDTensors:
   Algorithm,