From 809a7e5b0a39b2ceeff5ee0d2ddf517d88169c82 Mon Sep 17 00:00:00 2001
From: Francesco Evangelista <francesco.evangelista@gmail.com>
Date: Sun, 26 Apr 2020 21:29:10 -0400
Subject: [PATCH 1/2] Add TensorPool and TempTensor classes

---
 include/ambit/tensor.h      |   4 +-
 include/ambit/tensor_pool.h | 321 ++++++++++++++++++++++++++++++++++++
 src/CMakeLists.txt          |   1 +
 src/tensor/core/core.cc     |  15 ++
 src/tensor/core/core.h      |   2 +
 src/tensor/tensor.cc        |   2 +
 src/tensor/tensor_pool.cc   | 102 ++++++++++++
 src/tensor/tensorimpl.h     |  10 ++
 8 files changed, 456 insertions(+), 1 deletion(-)
 create mode 100644 include/ambit/tensor_pool.h
 create mode 100644 src/tensor/tensor_pool.cc
diff --git a/include/ambit/tensor.h b/include/ambit/tensor.h
index cbe0c15..b73b96d 100644
--- a/include/ambit/tensor.h
+++ b/include/ambit/tensor.h
@@ -214,7 +214,7 @@ class Tensor
      *
      * Example successful use case:
      *  Tensor A = Tensor::build(CoreTensor, "A3", {4,5,6});
-     *  A.at({0,0,0}) = 1.0; // Fill element 
+     *  A.at({0,0,0}) = 1.0; // Fill element
      *
      * Results:
      *  @return reference to the element, if tensor object supports it
@@ -517,6 +517,8 @@ class Tensor
   public:
     void reshape(const Dimension &dims);
 
+    void resize(const Dimension &dims);
+
     // => Operator Overloading API <= //
 
     LabeledTensor operator()(const string &indices) const;
diff --git a/include/ambit/tensor_pool.h b/include/ambit/tensor_pool.h
new file mode 100644
index 0000000..4acf96c
--- /dev/null
+++ b/include/ambit/tensor_pool.h
@@ -0,0 +1,321 @@
+/*
+ * @BEGIN LICENSE
+ *
+ * ambit: C++ library for the implementation of tensor product calculations
+ *        through a clean, concise user interface.
+ *
+ * Copyright (c) 2014-2017 Ambit developers.
+ *
+ * The copyrights for code used from other parties are included in
+ * the corresponding files.
+ *
+ * This file is part of ambit.
+ *
+ * Ambit is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * Ambit is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with ambit; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * @END LICENSE
+ */
+
+#if !defined(TENSOR_POOL_H)
+#define TENSOR_POOL_H
+
+#include <vector>
+
+#include <ambit/tensor.h>
+
+#if defined(__clang__)
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wunused-parameter"
+#endif
+
+namespace ambit
+{
+
+class TempTensor;
+
+class TensorPool
+{
+  public:
+    // => Constructors <= //
+
+    /**
+     * Returns a temporary Tensor of TensorType type
+     * with given name and dimensions dims
+     *
+     * Parameters:
+     *  @param type the TensorType
+     *  @param name the name of the Tensor
+     *  @param dims the dimensions of the indices of the tensor
+     *    (dims.size() is the tensor rank)
+     *
+     * Results:
+     *  @return a Tensor of TensorType type with name and dims
+     *   The returned Tensor is set to zero.
+     **/
+    TempTensor get_tensor(TensorType type, const string &name,
+                          const Dimension &dims);
+
+    /**
+     * Release a temporary Tensor
+     *
+     * Parameters:
+     *  @param id the id of the Tensor
+     *
+     * Results:
+     *  release a temporary tensor and make it available for a new call from
+     *  get_tensor
+     **/
+    void release_tensor(size_t id);
+
+    /**
+     * Frees the TensorPool's internal memory allocation.
+     */
+    void reset();
+
+  private:
+    std::vector<std::pair<bool, Tensor>> tensor_pool_;
+};
+
+class TempTensor
+{
+  public:
+    // => Constructor <= //
+    TempTensor(size_t id, Tensor t, TensorPool *tp) : id_(id), t_(t), tp_(tp) {}
+    ~TempTensor() { tp_->release_tensor(id_); }
+    Tensor tensor() { return t_; }
+
+  private:
+    size_t id_;
+    Tensor t_;
+    TensorPool *tp_;
+};
+
+//    virtual TensorImplPtr clone(TensorType type = CurrentTensor) const;
+
+//    // => Reflectors <= //
+
+//    TensorType type() const { return type_; }
+//    std::string name() const { return name_; }
+//    const Dimension &dims() const { return dims_; }
+//    const Dimension &addressing() const { return addressing_; }
+//    size_t dim(size_t ind) const { return dims_[ind]; }
+//    size_t rank() const { return dims_.size(); }
+//    size_t numel() const { return numel_; }
+
+//    void set_name(const std::string &name) { name_ = name; }
+
+//    void print(FILE *fh = stdout, bool level = true,
+//               const std::string &format = "%12.7f", int maxcols = 5) const;
+
+//    // => Setters/Getters <= //
+
+//    virtual std::vector<double> &data()
+//    {
+//        throw std::runtime_error(
+//            "TensorImpl::data() not supported for tensor type " +
+//            std::to_string(type()));
+//    }
+//    virtual const std::vector<double> &data() const
+//    {
+//        throw std::runtime_error(
+//            "TensorImpl::data() not supported for tensor type " +
+//            std::to_string(type()));
+//    }
+
+//    virtual double &at(const std::vector<size_t> &indices)
+//    {
+//        throw std::runtime_error(
+//            "TensorImpl::at() not supported for tensor type " +
+//            std::to_string(type()));
+//    }
+
+//    virtual const double &at(const std::vector<size_t> &indices) const
+//    {
+//        throw std::runtime_error(
+//            "TensorImpl::at() not supported for tensor type " +
+//            std::to_string(type()));
+//    }
+
+//    // => Simple Single Tensor Operations <= //
+
+//    virtual double norm(int type = 2) const
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+
+//    virtual std::tuple<double, std::vector<size_t>> max() const
+//    {
+//        throw std::runtime_error(
+//            "Operation not support in this tensor implementation.");
+//    }
+
+//    virtual std::tuple<double, std::vector<size_t>> min() const
+//    {
+//        throw std::runtime_error(
+//            "Operation not support in this tensor implementation.");
+//    }
+
+//    void zero();
+
+//    virtual void scale(double beta = 0.0)
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+
+//    virtual void set(double alpha)
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+
+//    void copy(ConstTensorImplPtr other);
+
+//    virtual void slice(ConstTensorImplPtr A, const IndexRange &Cinds,
+//                       const IndexRange &Ainds, double alpha = 1.0,
+//                       double beta = 0.0);
+
+//    virtual void permute(ConstTensorImplPtr A, const Indices &Cinds,
+//                         const Indices &Ainds, double alpha = 1.0,
+//                         double beta = 0.0)
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+
+//    virtual void contract(ConstTensorImplPtr A, ConstTensorImplPtr B,
+//                          const Indices &Cinds, const Indices &Ainds,
+//                          const Indices &Binds, double alpha = 1.0,
+//                          double beta = 0.0)
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+
+//    virtual void contract(ConstTensorImplPtr A, ConstTensorImplPtr B,
+//                          const Indices &Cinds, const Indices &Ainds,
+//                          const Indices &Binds, std::shared_ptr<TensorImpl>
+//                          &A2, std::shared_ptr<TensorImpl> &B2,
+//                          std::shared_ptr<TensorImpl> &C2, double alpha = 1.0,
+//                          double beta = 0.0)
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+
+//    virtual void gemm(ConstTensorImplPtr A, ConstTensorImplPtr B, bool transA,
+//                      bool transB, size_t nrow, size_t ncol, size_t nzip,
+//                      size_t ldaA, size_t ldaB, size_t ldaC, size_t offA = 0L,
+//                      size_t offB = 0L, size_t offC = 0L, double alpha = 1.0,
+//                      double beta = 0.0)
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+
+//    // => Rank-2 Operations <= //
+
+//    virtual std::map<std::string, TensorImplPtr>
+//    syev(EigenvalueOrder order) const
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+//    virtual std::map<std::string, TensorImplPtr>
+//    geev(EigenvalueOrder order) const
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+
+//    virtual std::map<std::string, TensorImplPtr> gesvd() const
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+//    // virtual TensorImplPtr cholesky() const = 0;
+//    // virtual std::map<std::string, TensorImplPtr> lu() const = 0;
+//    // virtual std::map<std::string, TensorImplPtr> qr() const = 0;
+
+//    // virtual TensorImplPtr cholesky_inverse() const = 0;
+//    virtual TensorImplPtr inverse() const
+//    {
+//        throw std::runtime_error(
+//            "Operation not support in this tensor implementation.");
+//    }
+//    virtual TensorImplPtr power(double power, double condition = 1.0E-12)
+//    const
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+
+//    // => Iterators <= //
+
+//    virtual void iterate(
+//        const std::function<void(const std::vector<size_t> &, double &)>
+//        &func)
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+//    virtual void citerate(const std::function<void(const std::vector<size_t>
+//    &,
+//                                                   const double &)> &func)
+//                                                   const
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+
+//    void reshape(const Dimension &dims)
+//    {
+//        dims_ = dims;
+//        addressing_ = Dimension(dims_.size(), 1);
+//        for (int n = static_cast<int>(dims_.size()) - 2; n >= 0; --n)
+//        {
+//            addressing_[n] = addressing_[n + 1] * dims_[n];
+//        }
+//    }
+
+//    void resize(const Dimension &dims)
+//    {
+//        throw std::runtime_error(
+//            "Operation not supported in this tensor implementation.");
+//    }
+
+//  protected:
+//    static bool typeCheck(TensorType type, ConstTensorImplPtr A,
+//                          bool throwIfDiff = true);
+//    static bool rankCheck(size_t rank, ConstTensorImplPtr A,
+//                          bool throwIfDiff = true);
+//    static bool squareCheck(ConstTensorImplPtr A, bool throwIfDiff = true);
+//    static bool dimensionCheck(ConstTensorImplPtr A, ConstTensorImplPtr B,
+//                               bool throwIfDiff = true);
+
+//  private:
+//    TensorType type_;
+//    std::string name_;
+//    Dimension dims_;
+//    /// The tensor strides
+//    Dimension addressing_;
+//    size_t numel_;
+} // namespace ambit
+
+#if defined(__clang__)
+#pragma clang diagnostic pop
+#endif
+
+#endif
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 4693206..6e426c2 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -58,6 +58,7 @@ set(TENSOR_SOURCES
         tensor/slice.cc
         tensor/sliced_tensor.cc
         tensor/tensor.cc
+        tensor/tensor_pool.cc
         tensor/tensorimpl.cc
         tensor/timer.cc
 
diff --git a/src/tensor/core/core.cc b/src/tensor/core/core.cc
index 6bcc763..c9e558c 100644
--- a/src/tensor/core/core.cc
+++ b/src/tensor/core/core.cc
@@ -55,6 +55,21 @@ void CoreTensorImpl::reshape(const Dimension &dims)
     TensorImpl::reshape(dims);
 }
 
+void CoreTensorImpl::resize(const Dimension &dims, bool trim)
+{
+    TensorImpl::reshape(dims);
+    // Requests that the vector capacity be at least enough to contain numel
+    // elements. If numel is greater than the current vector capacity, the
+    // function causes the container to reallocate its storage increasing its
+    // capacity to numel (or greater).
+
+    if (numel() > data_.size())
+    {
+        // TODO: implement trimming
+        data_.reserve(numel());
+    }
+}
+
 double CoreTensorImpl::norm(int type) const
 {
     double val = 0.0;
diff --git a/src/tensor/core/core.h b/src/tensor/core/core.h
index 11830d4..8d314b4 100644
--- a/src/tensor/core/core.h
+++ b/src/tensor/core/core.h
@@ -45,6 +45,8 @@ class CoreTensorImpl : public TensorImpl
     // the strides in the slice codes.
     void reshape(const Dimension &dims);
 
+    void resize(const Dimension &dims, bool trim = true);
+
     vector<double> &data() { return data_; }
     const vector<double> &data() const { return data_; }
 
diff --git a/src/tensor/tensor.cc b/src/tensor/tensor.cc
index cb68f3b..f307ead 100644
--- a/src/tensor/tensor.cc
+++ b/src/tensor/tensor.cc
@@ -206,6 +206,8 @@ Tensor Tensor::clone(TensorType type) const
 
 void Tensor::reshape(const Dimension &dims) { tensor_->reshape(dims); }
 
+void Tensor::resize(const Dimension &dims) { tensor_->resize(dims); }
+
 void Tensor::copy(const Tensor &other) { tensor_->copy(other.tensor_.get()); }
 
 Tensor::Tensor() {}
diff --git a/src/tensor/tensor_pool.cc b/src/tensor/tensor_pool.cc
new file mode 100644
index 0000000..008bb05
--- /dev/null
+++ b/src/tensor/tensor_pool.cc
@@ -0,0 +1,102 @@
+/*
+ * @BEGIN LICENSE
+ *
+ * ambit: C++ library for the implementation of tensor product calculations
+ *        through a clean, concise user interface.
+ *
+ * Copyright (c) 2014-2017 Ambit developers.
+ *
+ * The copyrights for code used from other parties are included in
+ * the corresponding files.
+ *
+ * This file is part of ambit.
+ *
+ * Ambit is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as published by
+ * the Free Software Foundation, version 3.
+ *
+ * Ambit is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with ambit; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * @END LICENSE
+ */
+
+//#include <stdexcept>
+//#include <string>
+//#include <vector>
+
+//#include "macros.h"
+
+#include <mutex> // std::mutex
+#include <numeric>
+
+#include <ambit/tensor_pool.h>
+
+//#if defined(__clang__)
+//#pragma clang diagnostic push
+//#pragma clang diagnostic ignored "-Wunused-parameter"
+//#endif
+
+namespace ambit
+{
+
+std::mutex pool_mutex; // mutex for critical section
+
+TempTensor TensorPool::get_tensor(TensorType type, const string &name,
+                                  const Dimension &dims)
+{
+    // critical section
+    pool_mutex.lock();
+
+    size_t target_numel =
+        std::accumulate(dims.begin(), dims.end(), static_cast<size_t>(1),
+                        std::multiplies<size_t>());
+    Tensor t;
+    size_t id;
+
+    // Find a free tensor of the appropriate size
+    bool found = false;
+    for (size_t n = 0; n < tensor_pool_.size(); n++)
+    {
+        auto &p = tensor_pool_[n];
+        // check if the busy flag is false
+        if (p.first == false)
+        {
+            // lock the tensor
+            p.first = true;
+            // resize the tensor
+            p.second.resize(dims);
+            // grab the Tensor object and set the id
+            t = p.second;
+            id = n;
+            found = true;
+            break;
+        }
+    }
+    // If there is no tensor, create one
+    if (not found)
+    {
+        t = Tensor::build(type, name, dims);
+        id = tensor_pool_.size();
+        tensor_pool_.push_back(std::make_pair(true, t));
+    }
+    pool_mutex.unlock();
+    return TempTensor(id, t, this);
+}
+
+void TensorPool::release_tensor(size_t id)
+{
+    // critical section
+    pool_mutex.lock();
+    tensor_pool_[id].first = false;
+    pool_mutex.unlock();
+}
+
+void TensorPool::reset() { tensor_pool_.clear(); }
+} // namespace ambit
diff --git a/src/tensor/tensorimpl.h b/src/tensor/tensorimpl.h
index d6ba0c6..9d9226f 100644
--- a/src/tensor/tensorimpl.h
+++ b/src/tensor/tensorimpl.h
@@ -30,6 +30,7 @@
 #if !defined(TENSOR_TENSORIMPL_H)
 #define TENSOR_TENSORIMPL_H
 
+#include <numeric>
 #include <stdexcept>
 #include <string>
 #include <vector>
@@ -254,6 +255,9 @@ class TensorImpl
     void reshape(const Dimension &dims)
     {
         dims_ = dims;
+        numel_ =
+            std::accumulate(dims_.begin(), dims_.end(), static_cast<size_t>(1),
+                            std::multiplies<size_t>());
         addressing_ = Dimension(dims_.size(), 1);
         for (int n = static_cast<int>(dims_.size()) - 2; n >= 0; --n)
         {
@@ -261,6 +265,12 @@ class TensorImpl
         }
     }
 
+    void resize(const Dimension &dims, bool trim = true)
+    {
+        throw std::runtime_error(
+            "Operation not supported in this tensor implementation.");
+    }
+
   protected:
     static bool typeCheck(TensorType type, ConstTensorImplPtr A,
                           bool throwIfDiff = true);

From 6ca6eebc1552f3c25b48ec4a1b521d8ce41e02ee Mon Sep 17 00:00:00 2001
From: Francesco Evangelista <francesco.evangelista@gmail.com>
Date: Thu, 30 Apr 2020 12:58:01 -0400
Subject: [PATCH 2/2] Use TempTensor in contractions

---
 include/ambit/tensor_pool.h  | 233 +----------------------
 src/tensor/labeled_tensor.cc | 349 ++++++++++++++++++++++-------------
 src/tensor/tensor.cc         |   3 +
 src/tensor/tensor_pool.cc    |  56 +++---
 src/tensor/tensorimpl.h      |   2 +-
 src/tensor/timer.cc          |   6 +-
 6 files changed, 276 insertions(+), 373 deletions(-)

diff --git a/include/ambit/tensor_pool.h b/include/ambit/tensor_pool.h
index 4acf96c..aec789f 100644
--- a/include/ambit/tensor_pool.h
+++ b/include/ambit/tensor_pool.h
@@ -50,21 +50,13 @@ class TensorPool
     // => Constructors <= //
 
     /**
-     * Returns a temporary Tensor of TensorType type
-     * with given name and dimensions dims
-     *
-     * Parameters:
-     *  @param type the TensorType
-     *  @param name the name of the Tensor
-     *  @param dims the dimensions of the indices of the tensor
-     *    (dims.size() is the tensor rank)
+     * Returns a temporary Core tensor. This function is thread safe.
      *
      * Results:
-     *  @return a Tensor of TensorType type with name and dims
-     *   The returned Tensor is set to zero.
+     *  @return a TempTensor object that will automatically make the
+     *  tensor available to the pool upon destruction.
      **/
-    TempTensor get_tensor(TensorType type, const string &name,
-                          const Dimension &dims);
+    TempTensor get_tensor();
 
     /**
      * Release a temporary Tensor
@@ -101,217 +93,12 @@ class TempTensor
     TensorPool *tp_;
 };
 
-//    virtual TensorImplPtr clone(TensorType type = CurrentTensor) const;
-
-//    // => Reflectors <= //
-
-//    TensorType type() const { return type_; }
-//    std::string name() const { return name_; }
-//    const Dimension &dims() const { return dims_; }
-//    const Dimension &addressing() const { return addressing_; }
-//    size_t dim(size_t ind) const { return dims_[ind]; }
-//    size_t rank() const { return dims_.size(); }
-//    size_t numel() const { return numel_; }
-
-//    void set_name(const std::string &name) { name_ = name; }
-
-//    void print(FILE *fh = stdout, bool level = true,
-//               const std::string &format = "%12.7f", int maxcols = 5) const;
-
-//    // => Setters/Getters <= //
-
-//    virtual std::vector<double> &data()
-//    {
-//        throw std::runtime_error(
-//            "TensorImpl::data() not supported for tensor type " +
-//            std::to_string(type()));
-//    }
-//    virtual const std::vector<double> &data() const
-//    {
-//        throw std::runtime_error(
-//            "TensorImpl::data() not supported for tensor type " +
-//            std::to_string(type()));
-//    }
-
-//    virtual double &at(const std::vector<size_t> &indices)
-//    {
-//        throw std::runtime_error(
-//            "TensorImpl::at() not supported for tensor type " +
-//            std::to_string(type()));
-//    }
-
-//    virtual const double &at(const std::vector<size_t> &indices) const
-//    {
-//        throw std::runtime_error(
-//            "TensorImpl::at() not supported for tensor type " +
-//            std::to_string(type()));
-//    }
-
-//    // => Simple Single Tensor Operations <= //
-
-//    virtual double norm(int type = 2) const
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-
-//    virtual std::tuple<double, std::vector<size_t>> max() const
-//    {
-//        throw std::runtime_error(
-//            "Operation not support in this tensor implementation.");
-//    }
-
-//    virtual std::tuple<double, std::vector<size_t>> min() const
-//    {
-//        throw std::runtime_error(
-//            "Operation not support in this tensor implementation.");
-//    }
-
-//    void zero();
-
-//    virtual void scale(double beta = 0.0)
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-
-//    virtual void set(double alpha)
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-
-//    void copy(ConstTensorImplPtr other);
-
-//    virtual void slice(ConstTensorImplPtr A, const IndexRange &Cinds,
-//                       const IndexRange &Ainds, double alpha = 1.0,
-//                       double beta = 0.0);
-
-//    virtual void permute(ConstTensorImplPtr A, const Indices &Cinds,
-//                         const Indices &Ainds, double alpha = 1.0,
-//                         double beta = 0.0)
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-
-//    virtual void contract(ConstTensorImplPtr A, ConstTensorImplPtr B,
-//                          const Indices &Cinds, const Indices &Ainds,
-//                          const Indices &Binds, double alpha = 1.0,
-//                          double beta = 0.0)
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-
-//    virtual void contract(ConstTensorImplPtr A, ConstTensorImplPtr B,
-//                          const Indices &Cinds, const Indices &Ainds,
-//                          const Indices &Binds, std::shared_ptr<TensorImpl>
-//                          &A2, std::shared_ptr<TensorImpl> &B2,
-//                          std::shared_ptr<TensorImpl> &C2, double alpha = 1.0,
-//                          double beta = 0.0)
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-
-//    virtual void gemm(ConstTensorImplPtr A, ConstTensorImplPtr B, bool transA,
-//                      bool transB, size_t nrow, size_t ncol, size_t nzip,
-//                      size_t ldaA, size_t ldaB, size_t ldaC, size_t offA = 0L,
-//                      size_t offB = 0L, size_t offC = 0L, double alpha = 1.0,
-//                      double beta = 0.0)
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-
-//    // => Rank-2 Operations <= //
-
-//    virtual std::map<std::string, TensorImplPtr>
-//    syev(EigenvalueOrder order) const
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-//    virtual std::map<std::string, TensorImplPtr>
-//    geev(EigenvalueOrder order) const
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-
-//    virtual std::map<std::string, TensorImplPtr> gesvd() const
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-//    // virtual TensorImplPtr cholesky() const = 0;
-//    // virtual std::map<std::string, TensorImplPtr> lu() const = 0;
-//    // virtual std::map<std::string, TensorImplPtr> qr() const = 0;
-
-//    // virtual TensorImplPtr cholesky_inverse() const = 0;
-//    virtual TensorImplPtr inverse() const
-//    {
-//        throw std::runtime_error(
-//            "Operation not support in this tensor implementation.");
-//    }
-//    virtual TensorImplPtr power(double power, double condition = 1.0E-12)
-//    const
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-
-//    // => Iterators <= //
-
-//    virtual void iterate(
-//        const std::function<void(const std::vector<size_t> &, double &)>
-//        &func)
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-//    virtual void citerate(const std::function<void(const std::vector<size_t>
-//    &,
-//                                                   const double &)> &func)
-//                                                   const
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-
-//    void reshape(const Dimension &dims)
-//    {
-//        dims_ = dims;
-//        addressing_ = Dimension(dims_.size(), 1);
-//        for (int n = static_cast<int>(dims_.size()) - 2; n >= 0; --n)
-//        {
-//            addressing_[n] = addressing_[n + 1] * dims_[n];
-//        }
-//    }
-
-//    void resize(const Dimension &dims)
-//    {
-//        throw std::runtime_error(
-//            "Operation not supported in this tensor implementation.");
-//    }
-
-//  protected:
-//    static bool typeCheck(TensorType type, ConstTensorImplPtr A,
-//                          bool throwIfDiff = true);
-//    static bool rankCheck(size_t rank, ConstTensorImplPtr A,
-//                          bool throwIfDiff = true);
-//    static bool squareCheck(ConstTensorImplPtr A, bool throwIfDiff = true);
-//    static bool dimensionCheck(ConstTensorImplPtr A, ConstTensorImplPtr B,
-//                               bool throwIfDiff = true);
-
-//  private:
-//    TensorType type_;
-//    std::string name_;
-//    Dimension dims_;
-//    /// The tensor strides
-//    Dimension addressing_;
-//    size_t numel_;
+namespace tensor_pool
+{
+void initialize();
+void finalize();
+TempTensor get_tensor();
+} // namespace tensor_pool
 } // namespace ambit
 
 #if defined(__clang__)
diff --git a/src/tensor/labeled_tensor.cc b/src/tensor/labeled_tensor.cc
index d8b5ca7..1f667bf 100644
--- a/src/tensor/labeled_tensor.cc
+++ b/src/tensor/labeled_tensor.cc
@@ -20,20 +20,21 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU Lesser General Public License for more details.
  *
- * You should have received a copy of the GNU Lesser General Public License along
- * with ambit; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with ambit; if not, write to the Free Software Foundation, Inc., 51
+ * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  *
  * @END LICENSE
  */
 
-#include <list>
+#include "indices.h"
+#include "tensorimpl.h"
 #include <algorithm>
-#include <numeric>
 #include <ambit/tensor.h>
-#include "tensorimpl.h"
-#include "indices.h"
+#include <ambit/tensor_pool.h>
 #include <cstring>
+#include <list>
+#include <numeric>
 
 namespace ambit
 {
@@ -135,7 +136,7 @@ LabeledTensor tensor_product_get_temp_AB(const LabeledTensor &A,
         Tensor::build(A.T().type(), A.T().name() + " * " + B.T().name(), dims);
     return T(indices::to_string(indices));
 }
-}
+} // namespace
 
 void LabeledTensor::contract(const LabeledTensorContraction &rhs,
                              bool zero_result, bool add, bool optimize_order)
@@ -146,7 +147,8 @@ void LabeledTensor::contract(const LabeledTensorContraction &rhs,
     std::iota(perm.begin(), perm.end(), 0);
     std::pair<double, double> best_cpu_memory_cost(1.0e200, 1.0e200);
 
-    if (optimize_order) {
+    if (optimize_order)
+    {
         do
         {
             std::pair<double, double> cpu_memory_cost =
@@ -159,10 +161,15 @@ void LabeledTensor::contract(const LabeledTensorContraction &rhs,
         } while (std::next_permutation(perm.begin(), perm.end()));
         // at this point 'best_perm' should be used to perform contraction in
         // optimal order.
-    } else {
+    }
+    else
+    {
         best_perm = perm;
     }
 
+    TempTensor tempTensor = tensor_pool::get_tensor();
+    Tensor tAB = tempTensor.tensor();
+
     const LabeledTensor &Aref = rhs[best_perm[0]];
     LabeledTensor A(Aref.T_, Aref.indices_, Aref.factor_);
     int maxn = int(nterms) - 2;
@@ -200,8 +207,7 @@ void LabeledTensor::contract(const LabeledTensorContraction &rhs,
             indices.push_back(B_fix_idx[i]);
         }
 
-        Tensor tAB = Tensor::build(A.T().type(),
-                                   A.T().name() + " * " + B.T().name(), dims);
+        tAB.resize(dims);
 
         tAB.contract(A.T(), B.T(), indices, A.indices(), B.indices(),
                      A.factor() * B.factor(), 0.0);
@@ -479,12 +485,17 @@ LabeledTensorDistribution::operator double() const
     return C.data()[0];
 }
 
-LabeledTensorBatchedContraction batched(const string &batched_indices, const LabeledTensorContraction &contraction) {
-    return LabeledTensorBatchedContraction(contraction, indices::split(batched_indices));
+LabeledTensorBatchedContraction
+batched(const string &batched_indices,
+        const LabeledTensorContraction &contraction)
+{
+    return LabeledTensorBatchedContraction(contraction,
+                                           indices::split(batched_indices));
 }
 
-void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_batched,
-                             bool zero_result, bool add, bool optimize_order)
+void LabeledTensor::contract_batched(
+    const LabeledTensorBatchedContraction &rhs_batched, bool zero_result,
+    bool add, bool optimize_order)
 {
     const LabeledTensorContraction &rhs = rhs_batched.get_contraction();
     const Indices &batched_indices = rhs_batched.get_batched_indices();
@@ -492,19 +503,28 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_
     // Find the indices to be batched in result labeled tensor.
     size_t batched_size = batched_indices.size();
     std::vector<size_t> slicing_axis(batched_size);
-    for (size_t l = 0; l < batched_size; ++l) {
-        auto it = std::find(indices_.begin(), indices_.end(), batched_indices[l]);
-        if (it != indices_.end()) {
+    for (size_t l = 0; l < batched_size; ++l)
+    {
+        auto it =
+            std::find(indices_.begin(), indices_.end(), batched_indices[l]);
+        if (it != indices_.end())
+        {
             slicing_axis[l] = std::distance(indices_.begin(), it);
-        } else {
-            throw std::runtime_error("Slicing indices do not exist in tensor indices.");
+        }
+        else
+        {
+            throw std::runtime_error(
+                "Slicing indices do not exist in tensor indices.");
         }
     }
 
-    // Determine if the result need permutation to permute the batched indices to the front
+    // Determine if the result need permutation to permute the batched indices
+    // to the front
     bool permute_flag = false;
-    for (size_t l = 0; l < batched_size; ++l) {
-        if (slicing_axis[l] != l) {
+    for (size_t l = 0; l < batched_size; ++l)
+    {
+        if (slicing_axis[l] != l)
+        {
             permute_flag = true;
             break;
         }
@@ -513,39 +533,47 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_
     // Determine batched dimensions.
     Indices permuted_indices;
     Dimension slicing_dims;
-    for (const string& s : batched_indices) {
+    for (const string &s : batched_indices)
+    {
         slicing_dims.push_back(dim_by_index(s));
     }
 
     // Permute result labeled tensor.
     Tensor Ltp;
-    if (permute_flag) {
+    if (permute_flag)
+    {
         permuted_indices = batched_indices;
-        for (size_t l = 0, l_max = numdim(); l < l_max; ++l) {
-            if (std::find(slicing_axis.begin(), slicing_axis.end(),l) == slicing_axis.end()) {
+        for (size_t l = 0, l_max = numdim(); l < l_max; ++l)
+        {
+            if (std::find(slicing_axis.begin(), slicing_axis.end(), l) ==
+                slicing_axis.end())
+            {
                 permuted_indices.push_back(indices_[l]);
             }
         }
         Dimension dims;
-        for (const string& s : permuted_indices) {
+        for (const string &s : permuted_indices)
+        {
             dims.push_back(dim_by_index(s));
         }
         Ltp = Tensor::build(T().type(), T().name() + " permute", dims);
         Ltp.permute(T_, permuted_indices, indices_);
-    } else {
+    }
+    else
+    {
         Ltp = T().clone();
         permuted_indices = indices_;
     }
     LabeledTensor Lt(Ltp, permuted_indices);
 
-
     size_t nterms = rhs.size();
     std::vector<size_t> perm(nterms);
     std::vector<size_t> best_perm(nterms);
     std::iota(perm.begin(), perm.end(), 0);
     std::pair<double, double> best_cpu_memory_cost(1.0e200, 1.0e200);
 
-    if (optimize_order) {
+    if (optimize_order)
+    {
         do
         {
             std::pair<double, double> cpu_memory_cost =
@@ -558,45 +586,66 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_
         } while (std::next_permutation(perm.begin(), perm.end()));
         // at this point 'best_perm' should be used to perform contraction in
         // optimal order.
-    } else {
+    }
+    else
+    {
         best_perm = perm;
     }
 
-    std::vector<std::vector<bool>> need_slicing(nterms, std::vector<bool>(batched_size + 1));
+    std::vector<std::vector<bool>> need_slicing(
+        nterms, std::vector<bool>(batched_size + 1));
 
     // Permute tensor indices if the corresponding tensor needs to be batched.
     LabeledTensorContraction rhsp;
-    for (size_t i = 0; i < nterms; ++i) {
-        const LabeledTensor& A = rhs[best_perm[i]];
-        const Indices& A_indices = A.indices();
+    for (size_t i = 0; i < nterms; ++i)
+    {
+        const LabeledTensor &A = rhs[best_perm[i]];
+        const Indices &A_indices = A.indices();
         Indices gemm_indices;
-        for (const string& s : A_indices) {
-            auto it = std::find(batched_indices.begin(), batched_indices.end(), s);
-            if (it != batched_indices.end()) {
-                need_slicing[i][std::distance(batched_indices.begin(), it)] = true;
-            } else {
+        for (const string &s : A_indices)
+        {
+            auto it =
+                std::find(batched_indices.begin(), batched_indices.end(), s);
+            if (it != batched_indices.end())
+            {
+                need_slicing[i][std::distance(batched_indices.begin(), it)] =
+                    true;
+            }
+            else
+            {
                 gemm_indices.push_back(s);
             }
         }
         Indices permuted_indices;
-        for (size_t l = 0; l < batched_size; ++l) {
-            if (need_slicing[i][l]) {
+        for (size_t l = 0; l < batched_size; ++l)
+        {
+            if (need_slicing[i][l])
+            {
                 permuted_indices.push_back(batched_indices[l]);
                 need_slicing[i][batched_size] = true;
             }
         }
-        if (permuted_indices.size() == 0) {
+        if (permuted_indices.size() == 0)
+        {
             rhsp.operator*(A);
-        } else {
-            permuted_indices.insert(permuted_indices.end(),gemm_indices.begin(),gemm_indices.end());
-            if (permuted_indices == A_indices) {
+        }
+        else
+        {
+            permuted_indices.insert(permuted_indices.end(),
+                                    gemm_indices.begin(), gemm_indices.end());
+            if (permuted_indices == A_indices)
+            {
                 rhsp.operator*(A);
-            } else {
+            }
+            else
+            {
                 Dimension dims;
-                for (const string& s : permuted_indices) {
+                for (const string &s : permuted_indices)
+                {
                     dims.push_back(A.dim_by_index(s));
                 }
-                Tensor Atp = Tensor::build(A.T().type(), A.T().name() + " permute", dims);
+                Tensor Atp = Tensor::build(A.T().type(),
+                                           A.T().name() + " permute", dims);
                 Atp.permute(A.T(), permuted_indices, A.indices());
                 LabeledTensor At(Atp, permuted_indices, A.factor());
                 rhsp.operator*(At);
@@ -605,103 +654,132 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_
     }
 
     // Create intermediate batch tensor for result tensor.
-    const Indices& Lt_indices = Lt.indices();
-    const Dimension& Lt_dims = Lt.T().dims();
+    const Indices &Lt_indices = Lt.indices();
+    const Dimension &Lt_dims = Lt.T().dims();
     Dimension sub_dims;
     Indices sub_indices;
-    sub_dims.insert(sub_dims.end(), Lt_dims.begin()+batched_size, Lt_dims.end());
-    sub_indices.insert(sub_indices.end(), Lt_indices.begin()+batched_size, Lt_indices.end());
-    Tensor Ltp_batch = Tensor::build(Lt.T().type(), Lt.T().name() + " batch", sub_dims);
+    sub_dims.insert(sub_dims.end(), Lt_dims.begin() + batched_size,
+                    Lt_dims.end());
+    sub_indices.insert(sub_indices.end(), Lt_indices.begin() + batched_size,
+                       Lt_indices.end());
+    Tensor Ltp_batch =
+        Tensor::build(Lt.T().type(), Lt.T().name() + " batch", sub_dims);
     LabeledTensor Lt_batch(Ltp_batch, sub_indices);
 
     // Create intermediate batch tensors for tensors to be contracted.
     LabeledTensorContraction rhs_batch;
     std::vector<Tensor> batch_tensors;
-    for (size_t i = 0; i < nterms; ++i) {
-        const LabeledTensor& A = rhsp[i];
-        if (need_slicing[i][batched_size]) {
-            const Indices& A_indices = A.indices();
-            const Dimension& A_dims = A.T().dims();
+    for (size_t i = 0; i < nterms; ++i)
+    {
+        const LabeledTensor &A = rhsp[i];
+        if (need_slicing[i][batched_size])
+        {
+            const Indices &A_indices = A.indices();
+            const Dimension &A_dims = A.T().dims();
             Dimension dims;
             Indices indices;
             size_t count = 0;
-            for (size_t l = 0; l < batched_size; ++l) {
-                if (need_slicing[i][l]) {
+            for (size_t l = 0; l < batched_size; ++l)
+            {
+                if (need_slicing[i][l])
+                {
                     count++;
                 }
             }
-            indices.insert(indices.end(), A_indices.begin()+count, A_indices.end());
-            dims.insert(dims.end(), A_dims.begin()+count, A_dims.end());
+            indices.insert(indices.end(), A_indices.begin() + count,
+                           A_indices.end());
+            dims.insert(dims.end(), A_dims.begin() + count, A_dims.end());
 
-            Tensor Atp = Tensor::build(A.T().type(), A.T().name() + " batch", dims);
+            Tensor Atp =
+                Tensor::build(A.T().type(), A.T().name() + " batch", dims);
             LabeledTensor At(Atp, indices, A.factor());
             rhs_batch.operator*(At);
             batch_tensors.push_back(Atp);
-        } else {
+        }
+        else
+        {
             rhs_batch.operator*(A);
             batch_tensors.push_back(A.T());
         }
     }
 
-    if (nterms == 2) {
+    if (nterms == 2)
+    {
         // Loop over batches to perform contraction
         std::vector<size_t> current_batch(batched_size, 0);
         shared_ptr<TensorImpl> A2;
         shared_ptr<TensorImpl> B2;
         shared_ptr<TensorImpl> C2;
-        while (current_batch[0] < slicing_dims[0]) {
+        while (current_batch[0] < slicing_dims[0])
+        {
             size_t L_shift = 0, cur_jump = 1;
             size_t sub_numel = Lt_batch.T().numel();
-            for (int i = batched_size - 1; i >= 0; --i) {
+            for (int i = batched_size - 1; i >= 0; --i)
+            {
                 L_shift += current_batch[i] * cur_jump;
                 cur_jump *= slicing_dims[i];
             }
             L_shift *= sub_numel;
-            std::vector<double>& Lt_batch_data = Ltp_batch.data();
-            std::vector<double>& Lt_data = Ltp.data();
-            std::memcpy(Lt_batch_data.data(), Lt_data.data()+L_shift, sub_numel * sizeof(double));
+            std::vector<double> &Lt_batch_data = Ltp_batch.data();
+            std::vector<double> &Lt_data = Ltp.data();
+            std::memcpy(Lt_batch_data.data(), Lt_data.data() + L_shift,
+                        sub_numel * sizeof(double));
 
-            for (size_t i = 0; i < nterms; ++i) {
-                if (need_slicing[i][batched_size]) {
+            for (size_t i = 0; i < nterms; ++i)
+            {
+                if (need_slicing[i][batched_size])
+                {
                     size_t cur_shift = 0, cur_jump = 1;
                     size_t sub_numel_A = batch_tensors[i].numel();
-                    for (int l = batched_size - 1; l >= 0; --l) {
-                        if (need_slicing[i][l]) {
+                    for (int l = batched_size - 1; l >= 0; --l)
+                    {
+                        if (need_slicing[i][l])
+                        {
                             cur_shift += current_batch[l] * cur_jump;
                             cur_jump *= slicing_dims[l];
                         }
                     }
                     cur_shift *= sub_numel_A;
-                    std::vector<double>& A_batch_data = batch_tensors[i].data();
-                    const std::vector<double>& A_data = rhsp[i].T().data();
-                    std::memcpy(A_batch_data.data(), A_data.data()+cur_shift, sub_numel_A * sizeof(double));
+                    std::vector<double> &A_batch_data = batch_tensors[i].data();
+                    const std::vector<double> &A_data = rhsp[i].T().data();
+                    std::memcpy(A_batch_data.data(), A_data.data() + cur_shift,
+                                sub_numel_A * sizeof(double));
                 }
             }
 
-            // The following code is identical to Lt_batch.contract(rhs_batch, zero_result, add);
+            // The following code is identical to Lt_batch.contract(rhs_batch,
+            // zero_result, add);
             const LabeledTensor &A = rhs_batch[0];
             const LabeledTensor &B = rhs_batch[1];
 
-            Ltp_batch.contract(batch_tensors[0], batch_tensors[1], sub_indices, A.indices(), B.indices(),
-                        A2, B2, C2,
-                        add ? A.factor() * B.factor() : -A.factor() * B.factor(),
-                        zero_result ? 0.0 : 1.0);
+            Ltp_batch.contract(batch_tensors[0], batch_tensors[1], sub_indices,
+                               A.indices(), B.indices(), A2, B2, C2,
+                               add ? A.factor() * B.factor()
+                                   : -A.factor() * B.factor(),
+                               zero_result ? 0.0 : 1.0);
 
             // Copy current batch tensor result to the full result tensor
-            const std::vector<double>& Ltc_batch_data = Ltp_batch.data();
-            std::memcpy(Lt_data.data() + L_shift, Ltc_batch_data.data(), sub_numel * sizeof(double));
+            const std::vector<double> &Ltc_batch_data = Ltp_batch.data();
+            std::memcpy(Lt_data.data() + L_shift, Ltc_batch_data.data(),
+                        sub_numel * sizeof(double));
 
             // Determine the indices of next batch
-            for (int i = batched_size - 1; i >= 0; --i) {
+            for (int i = batched_size - 1; i >= 0; --i)
+            {
                 current_batch[i]++;
-                if (current_batch[i] < slicing_dims[i]) {
+                if (current_batch[i] < slicing_dims[i])
+                {
                     break;
-                } else if (i != 0) {
+                }
+                else if (i != 0)
+                {
                     current_batch[i] = 0;
                 }
             }
         }
-    } else {
+    }
+    else
+    {
         // Prepare batch contraction intermediate tensors.
         std::vector<Tensor> inter_tensors;
         std::vector<Indices> inter_indices;
@@ -721,7 +799,8 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_
 
             for (size_t i = 0; i < AB_common_idx.size(); ++i)
             {
-                // If a common index is also found in the rhs it's a Hadamard index
+                // If a common index is also found in the rhs it's a Hadamard
+                // index
                 if (std::find(this->indices().begin(), this->indices().end(),
                               AB_common_idx[i]) != this->indices().end())
                 {
@@ -741,8 +820,8 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_
                 indices.push_back(B_fix_idx[i]);
             }
 
-            Tensor tAB = Tensor::build(A.T().type(),
-                                       A.T().name() + " * " + B.T().name(), dims);
+            Tensor tAB = Tensor::build(
+                A.T().type(), A.T().name() + " * " + B.T().name(), dims);
             inter_tensors.push_back(tAB);
             inter_indices.push_back(indices);
 
@@ -751,69 +830,89 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_
 
         // Loop over batches to perform contraction
         std::vector<size_t> current_batch(batched_size, 0);
-        std::vector<shared_ptr<TensorImpl>> A2s(maxn+1);
-        std::vector<shared_ptr<TensorImpl>> B2s(maxn+1);
-        std::vector<shared_ptr<TensorImpl>> C2s(maxn+1);
-        while (current_batch[0] < slicing_dims[0]) {
+        std::vector<shared_ptr<TensorImpl>> A2s(maxn + 1);
+        std::vector<shared_ptr<TensorImpl>> B2s(maxn + 1);
+        std::vector<shared_ptr<TensorImpl>> C2s(maxn + 1);
+        while (current_batch[0] < slicing_dims[0])
+        {
             size_t L_shift = 0, cur_jump = 1;
             size_t sub_numel = Lt_batch.T().numel();
-            for (int i = batched_size - 1; i >= 0; --i) {
+            for (int i = batched_size - 1; i >= 0; --i)
+            {
                 L_shift += current_batch[i] * cur_jump;
                 cur_jump *= slicing_dims[i];
             }
             L_shift *= sub_numel;
-            std::vector<double>& Lt_batch_data = Ltp_batch.data();
-            std::vector<double>& Lt_data = Ltp.data();
-            std::memcpy(Lt_batch_data.data(), Lt_data.data()+L_shift, sub_numel * sizeof(double));
+            std::vector<double> &Lt_batch_data = Ltp_batch.data();
+            std::vector<double> &Lt_data = Ltp.data();
+            std::memcpy(Lt_batch_data.data(), Lt_data.data() + L_shift,
+                        sub_numel * sizeof(double));
 
-            for (size_t i = 0; i < nterms; ++i) {
-                if (need_slicing[i][batched_size]) {
+            for (size_t i = 0; i < nterms; ++i)
+            {
+                if (need_slicing[i][batched_size])
+                {
                     size_t cur_shift = 0, cur_jump = 1;
                     size_t sub_numel_A = batch_tensors[i].numel();
-                    for (int l = batched_size - 1; l >= 0; --l) {
-                        if (need_slicing[i][l]) {
+                    for (int l = batched_size - 1; l >= 0; --l)
+                    {
+                        if (need_slicing[i][l])
+                        {
                             cur_shift += current_batch[l] * cur_jump;
                             cur_jump *= slicing_dims[l];
                         }
                     }
                     cur_shift *= sub_numel_A;
-                    std::vector<double>& A_batch_data = batch_tensors[i].data();
-                    const std::vector<double>& A_data = rhsp[i].T().data();
-                    std::memcpy(A_batch_data.data(), A_data.data()+cur_shift, sub_numel_A * sizeof(double));
+                    std::vector<double> &A_batch_data = batch_tensors[i].data();
+                    const std::vector<double> &A_data = rhsp[i].T().data();
+                    std::memcpy(A_batch_data.data(), A_data.data() + cur_shift,
+                                sub_numel_A * sizeof(double));
                 }
             }
 
-            // The following code is identical to Lt_batch.contract(rhs_batch, zero_result, add);
+            // The following code is identical to Lt_batch.contract(rhs_batch,
+            // zero_result, add);
             const LabeledTensor &A = rhs_batch[0];
             const LabeledTensor &B0 = rhs_batch[1];
             Tensor tAB = inter_tensors[0];
-            tAB.contract(batch_tensors[0], B0.T(), inter_indices[0], A.indices(), B0.indices(),
-                         A2s[0], B2s[0], C2s[0], A.factor() * B0.factor(), 0.0);
+            tAB.contract(batch_tensors[0], B0.T(), inter_indices[0],
+                         A.indices(), B0.indices(), A2s[0], B2s[0], C2s[0],
+                         A.factor() * B0.factor(), 0.0);
 
             for (int n = 1; n < maxn; ++n)
             {
                 const LabeledTensor &B = rhs_batch[n + 1];
-                inter_tensors[n].contract(inter_tensors[n-1], batch_tensors[n+1], inter_indices[n], inter_indices[n-1], B.indices(),
-                             A2s[n], B2s[n], C2s[n], B.factor(), 0.0);
+                inter_tensors[n].contract(
+                    inter_tensors[n - 1], batch_tensors[n + 1],
+                    inter_indices[n], inter_indices[n - 1], B.indices(), A2s[n],
+                    B2s[n], C2s[n], B.factor(), 0.0);
             }
             const LabeledTensor &B = rhs_batch[nterms - 1];
 
-            Ltp_batch.contract(inter_tensors[nterms-3], B.T(), sub_indices, inter_indices[nterms-3], B.indices(),
-                        A2s[maxn], B2s[maxn], C2s[maxn], add ? B.factor() : -B.factor(),
-                        zero_result ? 0.0 : 1.0);
-            // The intermediate tensors in this contraction should be reused for all the batches
-            // to avoid recreating the intermediate of the same size multiple times.
+            Ltp_batch.contract(inter_tensors[nterms - 3], B.T(), sub_indices,
+                               inter_indices[nterms - 3], B.indices(),
+                               A2s[maxn], B2s[maxn], C2s[maxn],
+                               add ? B.factor() : -B.factor(),
+                               zero_result ? 0.0 : 1.0);
+            // The intermediate tensors in this contraction should be reused for
+            // all the batches to avoid recreating the intermediate of the same
+            // size multiple times.
 
             // Copy current batch tensor result to the full result tensor
-            const std::vector<double>& Ltc_batch_data = Ltp_batch.data();
-            std::memcpy(Lt_data.data() + L_shift, Ltc_batch_data.data(), sub_numel * sizeof(double));
+            const std::vector<double> &Ltc_batch_data = Ltp_batch.data();
+            std::memcpy(Lt_data.data() + L_shift, Ltc_batch_data.data(),
+                        sub_numel * sizeof(double));
 
             // Determine the indices of next batch
-            for (int i = batched_size - 1; i >= 0; --i) {
+            for (int i = batched_size - 1; i >= 0; --i)
+            {
                 current_batch[i]++;
-                if (current_batch[i] < slicing_dims[i]) {
+                if (current_batch[i] < slicing_dims[i])
+                {
                     break;
-                } else if (i != 0) {
+                }
+                else if (i != 0)
+                {
                     current_batch[i] = 0;
                 }
             }
@@ -824,4 +923,4 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_
     T_.permute(Ltp, indices_, permuted_indices);
 }
 
-}
+} // namespace ambit
diff --git a/src/tensor/tensor.cc b/src/tensor/tensor.cc
index f307ead..bbfadd6 100644
--- a/src/tensor/tensor.cc
+++ b/src/tensor/tensor.cc
@@ -37,6 +37,7 @@
 #include "tensorimpl.h"
 #include <ambit/print.h>
 #include <ambit/tensor.h>
+#include <ambit/tensor_pool.h>
 
 #include "globals.h"
 
@@ -90,6 +91,7 @@ void common_initialize(int /*argc*/, char *const * /*argv*/)
     settings::ninitialized++;
 
     timer::initialize();
+    tensor_pool::initialize();
 
     // Set the scratch path for disk files
     const char *scratch_env = std::getenv("TENSOR_SCRATCH");
@@ -132,6 +134,7 @@ void finalize()
 
     timer::report();
     timer::finalize();
+    tensor_pool::finalize();
 }
 
 void barrier()
diff --git a/src/tensor/tensor_pool.cc b/src/tensor/tensor_pool.cc
index 008bb05..7ef90fb 100644
--- a/src/tensor/tensor_pool.cc
+++ b/src/tensor/tensor_pool.cc
@@ -27,40 +27,25 @@
  * @END LICENSE
  */
 
-//#include <stdexcept>
-//#include <string>
-//#include <vector>
-
-//#include "macros.h"
-
 #include <mutex> // std::mutex
 #include <numeric>
 
 #include <ambit/tensor_pool.h>
 
-//#if defined(__clang__)
-//#pragma clang diagnostic push
-//#pragma clang diagnostic ignored "-Wunused-parameter"
-//#endif
-
 namespace ambit
 {
 
 std::mutex pool_mutex; // mutex for critical section
 
-TempTensor TensorPool::get_tensor(TensorType type, const string &name,
-                                  const Dimension &dims)
+TempTensor TensorPool::get_tensor()
 {
     // critical section
     pool_mutex.lock();
 
-    size_t target_numel =
-        std::accumulate(dims.begin(), dims.end(), static_cast<size_t>(1),
-                        std::multiplies<size_t>());
     Tensor t;
     size_t id;
 
-    // Find a free tensor of the appropriate size
+    // Find a free tensor
     bool found = false;
     for (size_t n = 0; n < tensor_pool_.size(); n++)
     {
@@ -70,8 +55,6 @@ TempTensor TensorPool::get_tensor(TensorType type, const string &name,
         {
             // lock the tensor
             p.first = true;
-            // resize the tensor
-            p.second.resize(dims);
             // grab the Tensor object and set the id
             t = p.second;
             id = n;
@@ -79,11 +62,13 @@ TempTensor TensorPool::get_tensor(TensorType type, const string &name,
             break;
         }
     }
-    // If there is no tensor, create one
+    // If there is no tensor, create a core tensor with appropriate label and
+    // dimensions [1].
     if (not found)
     {
-        t = Tensor::build(type, name, dims);
         id = tensor_pool_.size();
+        t = Tensor::build(TensorType::CoreTensor,
+                          "temporary_tensor_" + std::to_string(id), {1});
         tensor_pool_.push_back(std::make_pair(true, t));
     }
     pool_mutex.unlock();
@@ -99,4 +84,33 @@ void TensorPool::release_tensor(size_t id)
 }
 
 void TensorPool::reset() { tensor_pool_.clear(); }
+
+namespace tensor_pool
+{
+
+TensorPool *tensor_pool = nullptr;
+
+void initialize()
+{
+    if (tensor_pool == nullptr)
+    {
+        tensor_pool = new TensorPool();
+    }
+    else
+    {
+        throw std::runtime_error("tensor_pool::initialize: the TensorPool has "
+                                 "already been initialized.");
+    }
+}
+
+void finalize()
+{
+    delete tensor_pool;
+    tensor_pool = nullptr;
+}
+
+TempTensor get_tensor() { return tensor_pool->get_tensor(); }
+
+} // namespace tensor_pool
+
 } // namespace ambit
diff --git a/src/tensor/tensorimpl.h b/src/tensor/tensorimpl.h
index 9d9226f..18543bd 100644
--- a/src/tensor/tensorimpl.h
+++ b/src/tensor/tensorimpl.h
@@ -265,7 +265,7 @@ class TensorImpl
         }
     }
 
-    void resize(const Dimension &dims, bool trim = true)
+    virtual void resize(const Dimension &dims, bool trim = true)
     {
         throw std::runtime_error(
             "Operation not supported in this tensor implementation.");
diff --git a/src/tensor/timer.cc b/src/tensor/timer.cc
index 9eb9b59..be28926 100644
--- a/src/tensor/timer.cc
+++ b/src/tensor/timer.cc
@@ -109,11 +109,11 @@ void print_timer_info(TimerDetail *timer)
     char buffer[512];
     if (timer != root)
     {
-        snprintf(buffer, 512, "%lld ms : %lld calls : %lld ms per call : ",
-                 std::chrono::duration_cast<std::chrono::milliseconds>(
+        snprintf(buffer, 512, "%lld mus : %lld calls : %lld mus per call : ",
+                 std::chrono::duration_cast<std::chrono::microseconds>(
                      timer->total_time),
                  timer->total_calls,
-                 std::chrono::duration_cast<std::chrono::milliseconds>(
+                 std::chrono::duration_cast<std::chrono::microseconds>(
                      timer->total_time) /
                      timer->total_calls);
         print("%s%*s%s\n", buffer,