Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MPS with cuQuantum #2168

Open
wants to merge 30 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
563ae6e
initial layout
MozammilQ Jun 5, 2024
280f868
refactor code
MozammilQ Jun 5, 2024
ae44c69
refactor code
MozammilQ Jun 6, 2024
5b48265
refactor code
MozammilQ Jun 6, 2024
517a554
refactor code
MozammilQ Jun 6, 2024
7e40588
refactor code
MozammilQ Jun 6, 2024
ebf9ca0
refactor code
MozammilQ Jun 6, 2024
a422690
refactor code
MozammilQ Jun 6, 2024
80b59d5
refactor code
MozammilQ Jun 6, 2024
52f1ed4
refactor code
MozammilQ Jun 7, 2024
ed43e71
refactor code
MozammilQ Jun 9, 2024
649a5d7
refactor code
MozammilQ Jun 9, 2024
83e4b5e
Merge branch 'main' into mps-cutensor
doichanj Jun 10, 2024
c33571f
refactor code
MozammilQ Jun 11, 2024
abc5552
Merge branch 'main' into mps-cutensor
doichanj Jun 14, 2024
f0205e3
refactor code
MozammilQ Jun 14, 2024
629f65f
refactor code
MozammilQ Jun 15, 2024
644a822
added release note
MozammilQ Jun 16, 2024
e6f2288
refactor code
MozammilQ Jun 17, 2024
42f983e
Merge branch 'Qiskit:main' into mps-cutensor
MozammilQ Jun 17, 2024
c24b9e2
refactor code
MozammilQ Jun 18, 2024
34e9502
refactor code
MozammilQ Jun 18, 2024
00f88e9
refactor code; included test
MozammilQ Jun 18, 2024
454f8c0
lint
MozammilQ Jun 18, 2024
985c7f2
added suggestion
MozammilQ Jun 18, 2024
7ffab7d
Merge branch 'main' into mps-cutensor
doichanj Jul 4, 2024
6b0b41d
Merge branch 'main' into mps-cutensor
MozammilQ Aug 30, 2024
34a5e75
fixed a typo
MozammilQ Aug 31, 2024
a1ae308
refactor code
MozammilQ Sep 10, 2024
859e946
Merge branch 'Qiskit:main' into mps-cutensor
MozammilQ Oct 4, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions releasenotes/notes/mps-svd-with-cuquantum-c0392854d1f373e0.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
---
features:
- |
This PR adds the ability to run Matrix Product State Simulation on Nvidia GPUs.
To be precise, this PR offloads the Singular Value Decomposition required for
Matrix Product State Simulation to Nvidia GPUs with the help of cuQuantum.

While choosing for the backend for Matrix Product State simulation users can
choose all as usual, but this time they can choose the device as GPU.

Example

.. code-block:: python

from qiskit_aer import AerSimulator
from qiskit.circuit import QuantumCircuit
from qiskit.compiler import transpile

num_qubits = 10
shots = 5

qc = QuantumCircuit(num_qubits)
qc.h(0)

for control, target in zip(range(num_qubits-1), range(1, num_qubits)):
qc.cx(control, target)

qc.measure_all()

sim = AerSimulator(method="matrix_product_state", device="GPU")
qc_t = transpile(qc, backend=sim)
job = sim.run(qc_t, shots = shots)

counts = job.result().get_counts()
counts





13 changes: 13 additions & 0 deletions src/simulators/matrix_product_state/matrix_product_state.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -360,6 +360,19 @@ void State::set_config(const Config &config) {

// Set LAPACK SVD
MPS::set_mps_lapack_svd(config.mps_lapack);

// Set device for SVD
MPS::set_mps_svd_device(config.device);

// Get CUDA device, if GPU offloading enabled
if (config.device.compare("GPU") == 0) {
#ifdef AER_THRUST_CUDA
cudaDeviceProp prop;
int deviceId{-1};
HANDLE_CUDA_ERROR(cudaGetDevice(&deviceId));
HANDLE_CUDA_ERROR(cudaGetDeviceProperties(&prop, deviceId));
#endif // AER_THRUST_CUDA
}
}

void State::add_metadata(ExperimentResult &result) const {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "stdlib.h"
#include "string.h"
#include <iostream>
#include <string>
#include <utility>

#include "framework/linalg/almost_equal.hpp"
Expand All @@ -45,6 +46,9 @@ double MPS::json_chop_threshold_ = 1E-8;
std::stringstream MPS::logging_str_;
bool MPS::mps_log_data_ = 0;
bool MPS::mps_lapack_ = false;
#ifdef AER_THRUST_CUDA
std::string MPS::mps_svd_device_;
#endif // AER_THRUST_CUDA

//------------------------------------------------------------------------
// local function declarations
Expand Down Expand Up @@ -663,8 +667,14 @@ void MPS::common_apply_2_qubit_gate(

MPS_Tensor left_gamma, right_gamma;
rvector_t lambda;
#ifdef AER_THRUST_CUDA
double discarded_value = MPS_Tensor::Decompose(
temp, left_gamma, lambda, right_gamma, MPS::mps_lapack_,
MPS::mps_svd_device_, cuda_stream, cutensor_handle);
#else
double discarded_value = MPS_Tensor::Decompose(temp, left_gamma, lambda,
right_gamma, MPS::mps_lapack_);
#endif // AER_THRUST_CUDA

if (discarded_value > json_chop_threshold_)
MPS::print_to_log("discarded_value=", discarded_value, ", ");
Expand Down Expand Up @@ -1803,7 +1813,18 @@ void MPS::initialize_from_matrix(uint_t num_qubits, const cmatrix_t &mat) {
// step 2 - SVD
S.clear();
S.resize(std::min(reshaped_matrix.GetRows(), reshaped_matrix.GetColumns()));

#ifdef AER_THRUST_CUDA
if (MPS::mps_svd_device_.compare("GPU") == 0) {
cutensor_csvd_wrapper(reshaped_matrix, U, S, V, cuda_stream,
cutensor_handle);
} else {
csvd_wrapper(reshaped_matrix, U, S, V, MPS::mps_lapack_);
}
#else
csvd_wrapper(reshaped_matrix, U, S, V, MPS::mps_lapack_);
#endif // AER_THRUST_CUDA

reduce_zeros(U, S, V, MPS_Tensor::get_max_bond_dimension(),
MPS_Tensor::get_truncation_threshold(), MPS::mps_lapack_);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,12 @@
#ifndef _aer_matrix_product_state_hpp_
#define _aer_matrix_product_state_hpp_

#include <cstdarg>

#include "framework/json.hpp"
#include "framework/operations.hpp"
#include "framework/utils.hpp"
#include "matrix_product_state_tensor.hpp"
#include <cstdarg>
#include <string>

namespace AER {
namespace MatrixProductState {
Expand Down Expand Up @@ -81,7 +81,14 @@ enum class MPS_swap_direction { SWAP_LEFT, SWAP_RIGHT };

class MPS {
public:
MPS(uint_t num_qubits = 0) : num_qubits_(num_qubits) {}
MPS(uint_t num_qubits = 0) : num_qubits_(num_qubits) {
#ifdef AER_THRUST_CUDA
if (mps_svd_device_.compare("GPU") == 0) {
cudaStreamCreate(&cuda_stream);
cutensornetCreate(&cutensor_handle);
}
#endif // AER_THRUST_CUDA
}
~MPS() {}

//--------------------------------------------------------------------------
Expand Down Expand Up @@ -321,6 +328,9 @@ class MPS {
}

static void set_mps_lapack_svd(bool mps_lapack) { mps_lapack_ = mps_lapack; }
static void set_mps_svd_device(std::string mps_svd_device) {
mps_svd_device_ = mps_svd_device;
}

static uint_t get_omp_threads() { return omp_threads_; }
static uint_t get_omp_threshold() { return omp_threshold_; }
Expand Down Expand Up @@ -544,6 +554,11 @@ class MPS {
std::vector<MPS_Tensor> q_reg_;
std::vector<rvector_t> lambda_reg_;

#ifdef AER_THRUST_CUDA
cudaStream_t cuda_stream;
cutensornetHandle_t cutensor_handle;
#endif // AER_THRUST_CUDA

struct ordering {
// order_ stores the current ordering of the qubits,
// location_ stores the location of each qubit in the vector. It is derived
Expand All @@ -570,6 +585,7 @@ class MPS {
static bool mps_log_data_;
static MPS_swap_direction mps_swap_direction_;
static bool mps_lapack_;
static std::string mps_svd_device_;
};

inline std::ostream &operator<<(std::ostream &out, const rvector_t &vec) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -157,12 +157,22 @@ class MPS_Tensor {
static MPS_Tensor contract(const MPS_Tensor &left_gamma,
const rvector_t &lambda,
const MPS_Tensor &right_gamma, bool mul_by_lambda);
#ifdef AER_THRUST_CUDA
static double Decompose(MPS_Tensor &temp, MPS_Tensor &left_gamma,
rvector_t &lambda, MPS_Tensor &right_gamma,
bool mps_lapack, std::string mps_svd_device,
cudaStream_t &cuda_stream,
cutensornetHandle_t &cutensor_handle);
#else
static double Decompose(MPS_Tensor &temp, MPS_Tensor &left_gamma,
rvector_t &lambda, MPS_Tensor &right_gamma,
bool mps_lapack);
#endif // AER_THRUST_CUDA

static void
reshape_for_3_qubits_before_SVD(const std::vector<cmatrix_t> &data,
MPS_Tensor &reshaped_tensor);

static void contract_2_dimensions(const MPS_Tensor &left_gamma,
const MPS_Tensor &right_gamma,
uint_t omp_threads, cmatrix_t &result);
Expand Down Expand Up @@ -591,15 +601,33 @@ void MPS_Tensor::contract_2_dimensions(const MPS_Tensor &left_gamma,
// rvector_t &lambda - tensors for the result.
// Returns: none.
//---------------------------------------------------------------
#ifdef AER_THRUST_CUDA
double MPS_Tensor::Decompose(MPS_Tensor &temp, MPS_Tensor &left_gamma,
rvector_t &lambda, MPS_Tensor &right_gamma,
bool mps_lapack, std::string mps_svd_device,
cudaStream_t &cuda_stream,
cutensornetHandle_t &cutensor_handle)
#else
double MPS_Tensor::Decompose(MPS_Tensor &temp, MPS_Tensor &left_gamma,
rvector_t &lambda, MPS_Tensor &right_gamma,
bool mps_lapack) {
bool mps_lapack)
#endif // AER_THRUST_CUDA
{
cmatrix_t C;
C = reshape_before_SVD(temp.data_);
cmatrix_t U, V;
rvector_t S(std::min(C.GetRows(), C.GetColumns()));

#ifdef AER_THRUST_CUDA
if (mps_svd_device.compare("GPU") == 0) {
cutensor_csvd_wrapper(C, U, S, V, cuda_stream, cutensor_handle);
} else {
csvd_wrapper(C, U, S, V, mps_lapack);
}
#else
csvd_wrapper(C, U, S, V, mps_lapack);
#endif // AER_THRUST_CUDA

double discarded_value = 0.0;
discarded_value = reduce_zeros(U, S, V, max_bond_dimension_,
truncation_threshold_, mps_lapack);
Expand Down
Loading