diff --git a/include/ambit/tensor.h b/include/ambit/tensor.h index cbe0c15..b73b96d 100644 --- a/include/ambit/tensor.h +++ b/include/ambit/tensor.h @@ -214,7 +214,7 @@ class Tensor * * Example successful use case: * Tensor A = Tensor::build(CoreTensor, "A3", {4,5,6}); - * A.at({0,0,0}) = 1.0; // Fill element + * A.at({0,0,0}) = 1.0; // Fill element * * Results: * @return reference to the element, if tensor object supports it @@ -517,6 +517,8 @@ class Tensor public: void reshape(const Dimension &dims); + void resize(const Dimension &dims); + // => Operator Overloading API <= // LabeledTensor operator()(const string &indices) const; diff --git a/include/ambit/tensor_pool.h b/include/ambit/tensor_pool.h new file mode 100644 index 0000000..aec789f --- /dev/null +++ b/include/ambit/tensor_pool.h @@ -0,0 +1,108 @@ +/* + * @BEGIN LICENSE + * + * ambit: C++ library for the implementation of tensor product calculations + * through a clean, concise user interface. + * + * Copyright (c) 2014-2017 Ambit developers. + * + * The copyrights for code used from other parties are included in + * the corresponding files. + * + * This file is part of ambit. + * + * Ambit is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, version 3. + * + * Ambit is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with ambit; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * @END LICENSE + */ + +#if !defined(TENSOR_POOL_H) +#define TENSOR_POOL_H + +#include + +#include + +#if defined(__clang__) +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wunused-parameter" +#endif + +namespace ambit +{ + +class TempTensor; + +class TensorPool +{ + public: + // => Constructors <= // + + /** + * Returns a temporary Core tensor. This function is thread safe. + * + * Results: + * @return a TempTensor object that will automatically make the + * tensor available to the pool upon destruction. + **/ + TempTensor get_tensor(); + + /** + * Release a temporary Tensor + * + * Parameters: + * @param id the id of the Tensor + * + * Results: + * release a temporary tensor and make it available for a new call from + * get_tensor + **/ + void release_tensor(size_t id); + + /** + * Frees the TensorPool's internal memory allocation. + */ + void reset(); + + private: + std::vector> tensor_pool_; +}; + +class TempTensor +{ + public: + // => Constructor <= // + TempTensor(size_t id, Tensor t, TensorPool *tp) : id_(id), t_(t), tp_(tp) {} + ~TempTensor() { tp_->release_tensor(id_); } + Tensor tensor() { return t_; } + + private: + size_t id_; + Tensor t_; + TensorPool *tp_; +}; + +namespace tensor_pool +{ +void initialize(); +void finalize(); +TempTensor get_tensor(); +} // namespace tensor_pool +} // namespace ambit + +#if defined(__clang__) +#pragma clang diagnostic pop +#endif + +#endif diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 4693206..6e426c2 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -58,6 +58,7 @@ set(TENSOR_SOURCES tensor/slice.cc tensor/sliced_tensor.cc tensor/tensor.cc + tensor/tensor_pool.cc tensor/tensorimpl.cc tensor/timer.cc diff --git a/src/tensor/core/core.cc b/src/tensor/core/core.cc index 6bcc763..c9e558c 100644 --- a/src/tensor/core/core.cc +++ b/src/tensor/core/core.cc @@ -55,6 +55,21 @@ void CoreTensorImpl::reshape(const Dimension &dims) TensorImpl::reshape(dims); } +void CoreTensorImpl::resize(const Dimension &dims, bool trim) +{ + TensorImpl::reshape(dims); + // Requests that the vector capacity be at least enough to contain numel + // elements. If numel is greater than the current vector capacity, the + // function causes the container to reallocate its storage increasing its + // capacity to numel (or greater). + + if (numel() > data_.size()) + { + // TODO: implement trimming + data_.reserve(numel()); + } +} + double CoreTensorImpl::norm(int type) const { double val = 0.0; diff --git a/src/tensor/core/core.h b/src/tensor/core/core.h index 11830d4..8d314b4 100644 --- a/src/tensor/core/core.h +++ b/src/tensor/core/core.h @@ -45,6 +45,8 @@ class CoreTensorImpl : public TensorImpl // the strides in the slice codes. void reshape(const Dimension &dims); + void resize(const Dimension &dims, bool trim = true); + vector &data() { return data_; } const vector &data() const { return data_; } diff --git a/src/tensor/labeled_tensor.cc b/src/tensor/labeled_tensor.cc index d8b5ca7..1f667bf 100644 --- a/src/tensor/labeled_tensor.cc +++ b/src/tensor/labeled_tensor.cc @@ -20,20 +20,21 @@ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public License along - * with ambit; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * You should have received a copy of the GNU Lesser General Public License + * along with ambit; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. * * @END LICENSE */ -#include +#include "indices.h" +#include "tensorimpl.h" #include -#include #include -#include "tensorimpl.h" -#include "indices.h" +#include #include +#include +#include namespace ambit { @@ -135,7 +136,7 @@ LabeledTensor tensor_product_get_temp_AB(const LabeledTensor &A, Tensor::build(A.T().type(), A.T().name() + " * " + B.T().name(), dims); return T(indices::to_string(indices)); } -} +} // namespace void LabeledTensor::contract(const LabeledTensorContraction &rhs, bool zero_result, bool add, bool optimize_order) @@ -146,7 +147,8 @@ void LabeledTensor::contract(const LabeledTensorContraction &rhs, std::iota(perm.begin(), perm.end(), 0); std::pair best_cpu_memory_cost(1.0e200, 1.0e200); - if (optimize_order) { + if (optimize_order) + { do { std::pair cpu_memory_cost = @@ -159,10 +161,15 @@ void LabeledTensor::contract(const LabeledTensorContraction &rhs, } while (std::next_permutation(perm.begin(), perm.end())); // at this point 'best_perm' should be used to perform contraction in // optimal order. - } else { + } + else + { best_perm = perm; } + TempTensor tempTensor = tensor_pool::get_tensor(); + Tensor tAB = tempTensor.tensor(); + const LabeledTensor &Aref = rhs[best_perm[0]]; LabeledTensor A(Aref.T_, Aref.indices_, Aref.factor_); int maxn = int(nterms) - 2; @@ -200,8 +207,7 @@ void LabeledTensor::contract(const LabeledTensorContraction &rhs, indices.push_back(B_fix_idx[i]); } - Tensor tAB = Tensor::build(A.T().type(), - A.T().name() + " * " + B.T().name(), dims); + tAB.resize(dims); tAB.contract(A.T(), B.T(), indices, A.indices(), B.indices(), A.factor() * B.factor(), 0.0); @@ -479,12 +485,17 @@ LabeledTensorDistribution::operator double() const return C.data()[0]; } -LabeledTensorBatchedContraction batched(const string &batched_indices, const LabeledTensorContraction &contraction) { - return LabeledTensorBatchedContraction(contraction, indices::split(batched_indices)); +LabeledTensorBatchedContraction +batched(const string &batched_indices, + const LabeledTensorContraction &contraction) +{ + return LabeledTensorBatchedContraction(contraction, + indices::split(batched_indices)); } -void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_batched, - bool zero_result, bool add, bool optimize_order) +void LabeledTensor::contract_batched( + const LabeledTensorBatchedContraction &rhs_batched, bool zero_result, + bool add, bool optimize_order) { const LabeledTensorContraction &rhs = rhs_batched.get_contraction(); const Indices &batched_indices = rhs_batched.get_batched_indices(); @@ -492,19 +503,28 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_ // Find the indices to be batched in result labeled tensor. size_t batched_size = batched_indices.size(); std::vector slicing_axis(batched_size); - for (size_t l = 0; l < batched_size; ++l) { - auto it = std::find(indices_.begin(), indices_.end(), batched_indices[l]); - if (it != indices_.end()) { + for (size_t l = 0; l < batched_size; ++l) + { + auto it = + std::find(indices_.begin(), indices_.end(), batched_indices[l]); + if (it != indices_.end()) + { slicing_axis[l] = std::distance(indices_.begin(), it); - } else { - throw std::runtime_error("Slicing indices do not exist in tensor indices."); + } + else + { + throw std::runtime_error( + "Slicing indices do not exist in tensor indices."); } } - // Determine if the result need permutation to permute the batched indices to the front + // Determine if the result need permutation to permute the batched indices + // to the front bool permute_flag = false; - for (size_t l = 0; l < batched_size; ++l) { - if (slicing_axis[l] != l) { + for (size_t l = 0; l < batched_size; ++l) + { + if (slicing_axis[l] != l) + { permute_flag = true; break; } @@ -513,39 +533,47 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_ // Determine batched dimensions. Indices permuted_indices; Dimension slicing_dims; - for (const string& s : batched_indices) { + for (const string &s : batched_indices) + { slicing_dims.push_back(dim_by_index(s)); } // Permute result labeled tensor. Tensor Ltp; - if (permute_flag) { + if (permute_flag) + { permuted_indices = batched_indices; - for (size_t l = 0, l_max = numdim(); l < l_max; ++l) { - if (std::find(slicing_axis.begin(), slicing_axis.end(),l) == slicing_axis.end()) { + for (size_t l = 0, l_max = numdim(); l < l_max; ++l) + { + if (std::find(slicing_axis.begin(), slicing_axis.end(), l) == + slicing_axis.end()) + { permuted_indices.push_back(indices_[l]); } } Dimension dims; - for (const string& s : permuted_indices) { + for (const string &s : permuted_indices) + { dims.push_back(dim_by_index(s)); } Ltp = Tensor::build(T().type(), T().name() + " permute", dims); Ltp.permute(T_, permuted_indices, indices_); - } else { + } + else + { Ltp = T().clone(); permuted_indices = indices_; } LabeledTensor Lt(Ltp, permuted_indices); - size_t nterms = rhs.size(); std::vector perm(nterms); std::vector best_perm(nterms); std::iota(perm.begin(), perm.end(), 0); std::pair best_cpu_memory_cost(1.0e200, 1.0e200); - if (optimize_order) { + if (optimize_order) + { do { std::pair cpu_memory_cost = @@ -558,45 +586,66 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_ } while (std::next_permutation(perm.begin(), perm.end())); // at this point 'best_perm' should be used to perform contraction in // optimal order. - } else { + } + else + { best_perm = perm; } - std::vector> need_slicing(nterms, std::vector(batched_size + 1)); + std::vector> need_slicing( + nterms, std::vector(batched_size + 1)); // Permute tensor indices if the corresponding tensor needs to be batched. LabeledTensorContraction rhsp; - for (size_t i = 0; i < nterms; ++i) { - const LabeledTensor& A = rhs[best_perm[i]]; - const Indices& A_indices = A.indices(); + for (size_t i = 0; i < nterms; ++i) + { + const LabeledTensor &A = rhs[best_perm[i]]; + const Indices &A_indices = A.indices(); Indices gemm_indices; - for (const string& s : A_indices) { - auto it = std::find(batched_indices.begin(), batched_indices.end(), s); - if (it != batched_indices.end()) { - need_slicing[i][std::distance(batched_indices.begin(), it)] = true; - } else { + for (const string &s : A_indices) + { + auto it = + std::find(batched_indices.begin(), batched_indices.end(), s); + if (it != batched_indices.end()) + { + need_slicing[i][std::distance(batched_indices.begin(), it)] = + true; + } + else + { gemm_indices.push_back(s); } } Indices permuted_indices; - for (size_t l = 0; l < batched_size; ++l) { - if (need_slicing[i][l]) { + for (size_t l = 0; l < batched_size; ++l) + { + if (need_slicing[i][l]) + { permuted_indices.push_back(batched_indices[l]); need_slicing[i][batched_size] = true; } } - if (permuted_indices.size() == 0) { + if (permuted_indices.size() == 0) + { rhsp.operator*(A); - } else { - permuted_indices.insert(permuted_indices.end(),gemm_indices.begin(),gemm_indices.end()); - if (permuted_indices == A_indices) { + } + else + { + permuted_indices.insert(permuted_indices.end(), + gemm_indices.begin(), gemm_indices.end()); + if (permuted_indices == A_indices) + { rhsp.operator*(A); - } else { + } + else + { Dimension dims; - for (const string& s : permuted_indices) { + for (const string &s : permuted_indices) + { dims.push_back(A.dim_by_index(s)); } - Tensor Atp = Tensor::build(A.T().type(), A.T().name() + " permute", dims); + Tensor Atp = Tensor::build(A.T().type(), + A.T().name() + " permute", dims); Atp.permute(A.T(), permuted_indices, A.indices()); LabeledTensor At(Atp, permuted_indices, A.factor()); rhsp.operator*(At); @@ -605,103 +654,132 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_ } // Create intermediate batch tensor for result tensor. - const Indices& Lt_indices = Lt.indices(); - const Dimension& Lt_dims = Lt.T().dims(); + const Indices &Lt_indices = Lt.indices(); + const Dimension &Lt_dims = Lt.T().dims(); Dimension sub_dims; Indices sub_indices; - sub_dims.insert(sub_dims.end(), Lt_dims.begin()+batched_size, Lt_dims.end()); - sub_indices.insert(sub_indices.end(), Lt_indices.begin()+batched_size, Lt_indices.end()); - Tensor Ltp_batch = Tensor::build(Lt.T().type(), Lt.T().name() + " batch", sub_dims); + sub_dims.insert(sub_dims.end(), Lt_dims.begin() + batched_size, + Lt_dims.end()); + sub_indices.insert(sub_indices.end(), Lt_indices.begin() + batched_size, + Lt_indices.end()); + Tensor Ltp_batch = + Tensor::build(Lt.T().type(), Lt.T().name() + " batch", sub_dims); LabeledTensor Lt_batch(Ltp_batch, sub_indices); // Create intermediate batch tensors for tensors to be contracted. LabeledTensorContraction rhs_batch; std::vector batch_tensors; - for (size_t i = 0; i < nterms; ++i) { - const LabeledTensor& A = rhsp[i]; - if (need_slicing[i][batched_size]) { - const Indices& A_indices = A.indices(); - const Dimension& A_dims = A.T().dims(); + for (size_t i = 0; i < nterms; ++i) + { + const LabeledTensor &A = rhsp[i]; + if (need_slicing[i][batched_size]) + { + const Indices &A_indices = A.indices(); + const Dimension &A_dims = A.T().dims(); Dimension dims; Indices indices; size_t count = 0; - for (size_t l = 0; l < batched_size; ++l) { - if (need_slicing[i][l]) { + for (size_t l = 0; l < batched_size; ++l) + { + if (need_slicing[i][l]) + { count++; } } - indices.insert(indices.end(), A_indices.begin()+count, A_indices.end()); - dims.insert(dims.end(), A_dims.begin()+count, A_dims.end()); + indices.insert(indices.end(), A_indices.begin() + count, + A_indices.end()); + dims.insert(dims.end(), A_dims.begin() + count, A_dims.end()); - Tensor Atp = Tensor::build(A.T().type(), A.T().name() + " batch", dims); + Tensor Atp = + Tensor::build(A.T().type(), A.T().name() + " batch", dims); LabeledTensor At(Atp, indices, A.factor()); rhs_batch.operator*(At); batch_tensors.push_back(Atp); - } else { + } + else + { rhs_batch.operator*(A); batch_tensors.push_back(A.T()); } } - if (nterms == 2) { + if (nterms == 2) + { // Loop over batches to perform contraction std::vector current_batch(batched_size, 0); shared_ptr A2; shared_ptr B2; shared_ptr C2; - while (current_batch[0] < slicing_dims[0]) { + while (current_batch[0] < slicing_dims[0]) + { size_t L_shift = 0, cur_jump = 1; size_t sub_numel = Lt_batch.T().numel(); - for (int i = batched_size - 1; i >= 0; --i) { + for (int i = batched_size - 1; i >= 0; --i) + { L_shift += current_batch[i] * cur_jump; cur_jump *= slicing_dims[i]; } L_shift *= sub_numel; - std::vector& Lt_batch_data = Ltp_batch.data(); - std::vector& Lt_data = Ltp.data(); - std::memcpy(Lt_batch_data.data(), Lt_data.data()+L_shift, sub_numel * sizeof(double)); + std::vector &Lt_batch_data = Ltp_batch.data(); + std::vector &Lt_data = Ltp.data(); + std::memcpy(Lt_batch_data.data(), Lt_data.data() + L_shift, + sub_numel * sizeof(double)); - for (size_t i = 0; i < nterms; ++i) { - if (need_slicing[i][batched_size]) { + for (size_t i = 0; i < nterms; ++i) + { + if (need_slicing[i][batched_size]) + { size_t cur_shift = 0, cur_jump = 1; size_t sub_numel_A = batch_tensors[i].numel(); - for (int l = batched_size - 1; l >= 0; --l) { - if (need_slicing[i][l]) { + for (int l = batched_size - 1; l >= 0; --l) + { + if (need_slicing[i][l]) + { cur_shift += current_batch[l] * cur_jump; cur_jump *= slicing_dims[l]; } } cur_shift *= sub_numel_A; - std::vector& A_batch_data = batch_tensors[i].data(); - const std::vector& A_data = rhsp[i].T().data(); - std::memcpy(A_batch_data.data(), A_data.data()+cur_shift, sub_numel_A * sizeof(double)); + std::vector &A_batch_data = batch_tensors[i].data(); + const std::vector &A_data = rhsp[i].T().data(); + std::memcpy(A_batch_data.data(), A_data.data() + cur_shift, + sub_numel_A * sizeof(double)); } } - // The following code is identical to Lt_batch.contract(rhs_batch, zero_result, add); + // The following code is identical to Lt_batch.contract(rhs_batch, + // zero_result, add); const LabeledTensor &A = rhs_batch[0]; const LabeledTensor &B = rhs_batch[1]; - Ltp_batch.contract(batch_tensors[0], batch_tensors[1], sub_indices, A.indices(), B.indices(), - A2, B2, C2, - add ? A.factor() * B.factor() : -A.factor() * B.factor(), - zero_result ? 0.0 : 1.0); + Ltp_batch.contract(batch_tensors[0], batch_tensors[1], sub_indices, + A.indices(), B.indices(), A2, B2, C2, + add ? A.factor() * B.factor() + : -A.factor() * B.factor(), + zero_result ? 0.0 : 1.0); // Copy current batch tensor result to the full result tensor - const std::vector& Ltc_batch_data = Ltp_batch.data(); - std::memcpy(Lt_data.data() + L_shift, Ltc_batch_data.data(), sub_numel * sizeof(double)); + const std::vector &Ltc_batch_data = Ltp_batch.data(); + std::memcpy(Lt_data.data() + L_shift, Ltc_batch_data.data(), + sub_numel * sizeof(double)); // Determine the indices of next batch - for (int i = batched_size - 1; i >= 0; --i) { + for (int i = batched_size - 1; i >= 0; --i) + { current_batch[i]++; - if (current_batch[i] < slicing_dims[i]) { + if (current_batch[i] < slicing_dims[i]) + { break; - } else if (i != 0) { + } + else if (i != 0) + { current_batch[i] = 0; } } } - } else { + } + else + { // Prepare batch contraction intermediate tensors. std::vector inter_tensors; std::vector inter_indices; @@ -721,7 +799,8 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_ for (size_t i = 0; i < AB_common_idx.size(); ++i) { - // If a common index is also found in the rhs it's a Hadamard index + // If a common index is also found in the rhs it's a Hadamard + // index if (std::find(this->indices().begin(), this->indices().end(), AB_common_idx[i]) != this->indices().end()) { @@ -741,8 +820,8 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_ indices.push_back(B_fix_idx[i]); } - Tensor tAB = Tensor::build(A.T().type(), - A.T().name() + " * " + B.T().name(), dims); + Tensor tAB = Tensor::build( + A.T().type(), A.T().name() + " * " + B.T().name(), dims); inter_tensors.push_back(tAB); inter_indices.push_back(indices); @@ -751,69 +830,89 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_ // Loop over batches to perform contraction std::vector current_batch(batched_size, 0); - std::vector> A2s(maxn+1); - std::vector> B2s(maxn+1); - std::vector> C2s(maxn+1); - while (current_batch[0] < slicing_dims[0]) { + std::vector> A2s(maxn + 1); + std::vector> B2s(maxn + 1); + std::vector> C2s(maxn + 1); + while (current_batch[0] < slicing_dims[0]) + { size_t L_shift = 0, cur_jump = 1; size_t sub_numel = Lt_batch.T().numel(); - for (int i = batched_size - 1; i >= 0; --i) { + for (int i = batched_size - 1; i >= 0; --i) + { L_shift += current_batch[i] * cur_jump; cur_jump *= slicing_dims[i]; } L_shift *= sub_numel; - std::vector& Lt_batch_data = Ltp_batch.data(); - std::vector& Lt_data = Ltp.data(); - std::memcpy(Lt_batch_data.data(), Lt_data.data()+L_shift, sub_numel * sizeof(double)); + std::vector &Lt_batch_data = Ltp_batch.data(); + std::vector &Lt_data = Ltp.data(); + std::memcpy(Lt_batch_data.data(), Lt_data.data() + L_shift, + sub_numel * sizeof(double)); - for (size_t i = 0; i < nterms; ++i) { - if (need_slicing[i][batched_size]) { + for (size_t i = 0; i < nterms; ++i) + { + if (need_slicing[i][batched_size]) + { size_t cur_shift = 0, cur_jump = 1; size_t sub_numel_A = batch_tensors[i].numel(); - for (int l = batched_size - 1; l >= 0; --l) { - if (need_slicing[i][l]) { + for (int l = batched_size - 1; l >= 0; --l) + { + if (need_slicing[i][l]) + { cur_shift += current_batch[l] * cur_jump; cur_jump *= slicing_dims[l]; } } cur_shift *= sub_numel_A; - std::vector& A_batch_data = batch_tensors[i].data(); - const std::vector& A_data = rhsp[i].T().data(); - std::memcpy(A_batch_data.data(), A_data.data()+cur_shift, sub_numel_A * sizeof(double)); + std::vector &A_batch_data = batch_tensors[i].data(); + const std::vector &A_data = rhsp[i].T().data(); + std::memcpy(A_batch_data.data(), A_data.data() + cur_shift, + sub_numel_A * sizeof(double)); } } - // The following code is identical to Lt_batch.contract(rhs_batch, zero_result, add); + // The following code is identical to Lt_batch.contract(rhs_batch, + // zero_result, add); const LabeledTensor &A = rhs_batch[0]; const LabeledTensor &B0 = rhs_batch[1]; Tensor tAB = inter_tensors[0]; - tAB.contract(batch_tensors[0], B0.T(), inter_indices[0], A.indices(), B0.indices(), - A2s[0], B2s[0], C2s[0], A.factor() * B0.factor(), 0.0); + tAB.contract(batch_tensors[0], B0.T(), inter_indices[0], + A.indices(), B0.indices(), A2s[0], B2s[0], C2s[0], + A.factor() * B0.factor(), 0.0); for (int n = 1; n < maxn; ++n) { const LabeledTensor &B = rhs_batch[n + 1]; - inter_tensors[n].contract(inter_tensors[n-1], batch_tensors[n+1], inter_indices[n], inter_indices[n-1], B.indices(), - A2s[n], B2s[n], C2s[n], B.factor(), 0.0); + inter_tensors[n].contract( + inter_tensors[n - 1], batch_tensors[n + 1], + inter_indices[n], inter_indices[n - 1], B.indices(), A2s[n], + B2s[n], C2s[n], B.factor(), 0.0); } const LabeledTensor &B = rhs_batch[nterms - 1]; - Ltp_batch.contract(inter_tensors[nterms-3], B.T(), sub_indices, inter_indices[nterms-3], B.indices(), - A2s[maxn], B2s[maxn], C2s[maxn], add ? B.factor() : -B.factor(), - zero_result ? 0.0 : 1.0); - // The intermediate tensors in this contraction should be reused for all the batches - // to avoid recreating the intermediate of the same size multiple times. + Ltp_batch.contract(inter_tensors[nterms - 3], B.T(), sub_indices, + inter_indices[nterms - 3], B.indices(), + A2s[maxn], B2s[maxn], C2s[maxn], + add ? B.factor() : -B.factor(), + zero_result ? 0.0 : 1.0); + // The intermediate tensors in this contraction should be reused for + // all the batches to avoid recreating the intermediate of the same + // size multiple times. // Copy current batch tensor result to the full result tensor - const std::vector& Ltc_batch_data = Ltp_batch.data(); - std::memcpy(Lt_data.data() + L_shift, Ltc_batch_data.data(), sub_numel * sizeof(double)); + const std::vector &Ltc_batch_data = Ltp_batch.data(); + std::memcpy(Lt_data.data() + L_shift, Ltc_batch_data.data(), + sub_numel * sizeof(double)); // Determine the indices of next batch - for (int i = batched_size - 1; i >= 0; --i) { + for (int i = batched_size - 1; i >= 0; --i) + { current_batch[i]++; - if (current_batch[i] < slicing_dims[i]) { + if (current_batch[i] < slicing_dims[i]) + { break; - } else if (i != 0) { + } + else if (i != 0) + { current_batch[i] = 0; } } @@ -824,4 +923,4 @@ void LabeledTensor::contract_batched(const LabeledTensorBatchedContraction &rhs_ T_.permute(Ltp, indices_, permuted_indices); } -} +} // namespace ambit diff --git a/src/tensor/tensor.cc b/src/tensor/tensor.cc index cb68f3b..bbfadd6 100644 --- a/src/tensor/tensor.cc +++ b/src/tensor/tensor.cc @@ -37,6 +37,7 @@ #include "tensorimpl.h" #include #include +#include #include "globals.h" @@ -90,6 +91,7 @@ void common_initialize(int /*argc*/, char *const * /*argv*/) settings::ninitialized++; timer::initialize(); + tensor_pool::initialize(); // Set the scratch path for disk files const char *scratch_env = std::getenv("TENSOR_SCRATCH"); @@ -132,6 +134,7 @@ void finalize() timer::report(); timer::finalize(); + tensor_pool::finalize(); } void barrier() @@ -206,6 +209,8 @@ Tensor Tensor::clone(TensorType type) const void Tensor::reshape(const Dimension &dims) { tensor_->reshape(dims); } +void Tensor::resize(const Dimension &dims) { tensor_->resize(dims); } + void Tensor::copy(const Tensor &other) { tensor_->copy(other.tensor_.get()); } Tensor::Tensor() {} diff --git a/src/tensor/tensor_pool.cc b/src/tensor/tensor_pool.cc new file mode 100644 index 0000000..7ef90fb --- /dev/null +++ b/src/tensor/tensor_pool.cc @@ -0,0 +1,116 @@ +/* + * @BEGIN LICENSE + * + * ambit: C++ library for the implementation of tensor product calculations + * through a clean, concise user interface. + * + * Copyright (c) 2014-2017 Ambit developers. + * + * The copyrights for code used from other parties are included in + * the corresponding files. + * + * This file is part of ambit. + * + * Ambit is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as published by + * the Free Software Foundation, version 3. + * + * Ambit is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with ambit; if not, write to the Free Software Foundation, Inc., 51 + * Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + * + * @END LICENSE + */ + +#include // std::mutex +#include + +#include + +namespace ambit +{ + +std::mutex pool_mutex; // mutex for critical section + +TempTensor TensorPool::get_tensor() +{ + // critical section + pool_mutex.lock(); + + Tensor t; + size_t id; + + // Find a free tensor + bool found = false; + for (size_t n = 0; n < tensor_pool_.size(); n++) + { + auto &p = tensor_pool_[n]; + // check if the busy flag is false + if (p.first == false) + { + // lock the tensor + p.first = true; + // grab the Tensor object and set the id + t = p.second; + id = n; + found = true; + break; + } + } + // If there is no tensor, create a core tensor with appropriate label and + // dimensions [1]. + if (not found) + { + id = tensor_pool_.size(); + t = Tensor::build(TensorType::CoreTensor, + "temporary_tensor_" + std::to_string(id), {1}); + tensor_pool_.push_back(std::make_pair(true, t)); + } + pool_mutex.unlock(); + return TempTensor(id, t, this); +} + +void TensorPool::release_tensor(size_t id) +{ + // critical section + pool_mutex.lock(); + tensor_pool_[id].first = false; + pool_mutex.unlock(); +} + +void TensorPool::reset() { tensor_pool_.clear(); } + +namespace tensor_pool +{ + +TensorPool *tensor_pool = nullptr; + +void initialize() +{ + if (tensor_pool == nullptr) + { + tensor_pool = new TensorPool(); + } + else + { + throw std::runtime_error("tensor_pool::initialize: the TensorPool has " + "already been initialized."); + } +} + +void finalize() +{ + delete tensor_pool; + tensor_pool = nullptr; +} + +TempTensor get_tensor() { return tensor_pool->get_tensor(); } + +} // namespace tensor_pool + +} // namespace ambit diff --git a/src/tensor/tensorimpl.h b/src/tensor/tensorimpl.h index d6ba0c6..18543bd 100644 --- a/src/tensor/tensorimpl.h +++ b/src/tensor/tensorimpl.h @@ -30,6 +30,7 @@ #if !defined(TENSOR_TENSORIMPL_H) #define TENSOR_TENSORIMPL_H +#include #include #include #include @@ -254,6 +255,9 @@ class TensorImpl void reshape(const Dimension &dims) { dims_ = dims; + numel_ = + std::accumulate(dims_.begin(), dims_.end(), static_cast(1), + std::multiplies()); addressing_ = Dimension(dims_.size(), 1); for (int n = static_cast(dims_.size()) - 2; n >= 0; --n) { @@ -261,6 +265,12 @@ class TensorImpl } } + virtual void resize(const Dimension &dims, bool trim = true) + { + throw std::runtime_error( + "Operation not supported in this tensor implementation."); + } + protected: static bool typeCheck(TensorType type, ConstTensorImplPtr A, bool throwIfDiff = true); diff --git a/src/tensor/timer.cc b/src/tensor/timer.cc index 9eb9b59..be28926 100644 --- a/src/tensor/timer.cc +++ b/src/tensor/timer.cc @@ -109,11 +109,11 @@ void print_timer_info(TimerDetail *timer) char buffer[512]; if (timer != root) { - snprintf(buffer, 512, "%lld ms : %lld calls : %lld ms per call : ", - std::chrono::duration_cast( + snprintf(buffer, 512, "%lld mus : %lld calls : %lld mus per call : ", + std::chrono::duration_cast( timer->total_time), timer->total_calls, - std::chrono::duration_cast( + std::chrono::duration_cast( timer->total_time) / timer->total_calls); print("%s%*s%s\n", buffer,