From f9a834debc58f4036669b802ef65e88eab52de37 Mon Sep 17 00:00:00 2001 From: alexhold5 <87720137+alexhold5@users.noreply.github.com> Date: Sun, 2 Feb 2025 10:23:32 -0500 Subject: [PATCH] Add files via upload --- moodys_challenge.ipynb | 695 +++++++++++++++++++++++++++++++++++++++-- 1 file changed, 664 insertions(+), 31 deletions(-) diff --git a/moodys_challenge.ipynb b/moodys_challenge.ipynb index 7ee0e87..91c9f70 100644 --- a/moodys_challenge.ipynb +++ b/moodys_challenge.ipynb @@ -161,15 +161,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "4ec880cd", - "metadata": {}, - "outputs": [], + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "done\n" + ] + } + ], "source": [ + "#testing\n", + "\n", "import pandas as pd\n", "import numpy as np\n", "\n", - "time_series = np.log(pd.read_csv(\"SP500.csv\", header=None).to_numpy().squeeze())" + "time_series = np.log(pd.read_csv(\"sp500.csv\", header=None).to_numpy().squeeze())" ] }, { @@ -209,36 +221,93 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "7b701be6", "metadata": {}, "outputs": [], "source": [ - "# write your code here" + "# write your code here\n", + "\n", + "\n", + "def takens_embedding(time_series, N, d):\n", + " \"\"\"\n", + " We are using Taken's embedding theorem, which states that we can\n", + " reconstruct the topology and dynamical behavior of the global system\n", + " via a vector of dimension d of the time delayed data points\n", + "\n", + " \"\"\"\n", + " L = len(time_series)\n", + " if L < (N - 1) * d:\n", + " raise ValueError(\"Time series is too short for given N and d.\")\n", + " # list comprehension to create the time series\n", + " return np.array([time_series[i : i + (N - 1) * d + 1 : d] for i in range(L - (N - 1) * d)])\n", + "\n", + "\n" ] }, { - "cell_type": "markdown", - "id": "9376bd0b", + "cell_type": "code", + "execution_count": 3, + "id": "e0140903-54bb-42bd-9df1-ace45517b265", "metadata": {}, + "outputs": [], "source": [ - "
\n", - " \n", - "BONUS EXERCISE: \n", + "def sliding_window(embedded_vectors, w):\n", + " \"\"\"\n", + " This returns a set of w-sized sequences of embedded vectors.\n", + " In total, this acts like a sliding window of the behavior of the system.\n", + " \"\"\"\n", + " M = embedded_vectors.shape[0]\n", + " K = M - w + 1 # Number of windows\n", + "\n", + " if K <= 0:\n", + " raise ValueError(\"Window size w is too large for the given data.\")\n", + " # list comprehension to divide the previous 2-d array into submatrices\n", + " return np.array([embedded_vectors[i : i + w] for i in range(K)])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "95c41a2c-23b4-412e-b4de-1b0f8f0758d5", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shape of embedded vectors: (81, 3)\n", + "Shape of point cloud: (77, 5, 3)\n" + ] + } + ], + "source": [ + "#embedding dimension (dimension of simplex)\n", + "N = 3\n", + "#time delay (1 cause discrete)\n", + "d = 1\n", + "#dimension of window\n", + "w = 5\n", "\n", - "`gtda.time_series.TakensEmbedding` can conduct this transformation. Try avoid using this function and build your own embedding function.\n", + "embedded_vectors = takens_embedding(time_series, N, d)\n", "\n", - "
" + "# we construct our point cloude via this sliding window\n", + "point_cloud = sliding_window(embedded_vectors, w)\n", + "\n", + "print(\"Shape of embedded vectors:\", embedded_vectors.shape)\n", + "print(\"Shape of point cloud:\", point_cloud.shape)\n" ] }, { "cell_type": "markdown", - "id": "9780b90c", + "id": "9376bd0b", "metadata": {}, "source": [ - "
NOTE:\n", + "
\n", + " \n", + "BONUS EXERCISE: \n", "\n", - "From step 2, we present an example originally provided in the appendix of [Khandelwal's and Chandra's paper](https://arxiv.org/abs/2302.09553). This example can be used to verify your code.\n", + "`gtda.time_series.TakensEmbedding` can conduct this transformation. Try avoid using this function and build your own embedding function.\n", "\n", "
" ] @@ -273,12 +342,68 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "026d6b61", + "execution_count": 5, + "id": "efcda15b-3685-431a-91de-cb1929743103", "metadata": {}, "outputs": [], "source": [ - "# write your code here" + "def sort_simplices(input_list):\n", + " \"\"\"\n", + " Organizes a list of simplices into groups based on dimension.\n", + " \n", + " Args:\n", + " - input_list (list of lists): A list containing simplices of different dimensions.\n", + " \n", + " Returns:\n", + " - A nested list: [[0-simplices], [1-simplices], [2-simplices], ...]\n", + " \"\"\"\n", + " # Sort elements into categories based on length\n", + " sorted_simplices = {}\n", + " \n", + " for simplex in input_list:\n", + " key = len(simplex) - 1 # Dimension of the simplex (0-based)\n", + " simplex_sorted = sorted(simplex) # Ensure order consistency\n", + " \n", + " if key not in sorted_simplices:\n", + " sorted_simplices[key] = []\n", + " \n", + " sorted_simplices[key].append(simplex_sorted)\n", + "\n", + " # Convert the dictionary into a list of lists, ensuring correct order\n", + " return [sorted_simplices.get(dim, []) for dim in sorted(sorted_simplices.keys())]" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "026d6b61", + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "done\n" + ] + } + ], + "source": [ + "# write your code here\n", + "import gudhi\n", + "\n", + "point_cloud = point_cloud.reshape(point_cloud.shape[0], -1)\n", + "\n", + "epsilon = 12\n", + "rips_complex = gudhi.RipsComplex(points=point_cloud, max_edge_length = epsilon)\n", + "unfilt_simplex_tree = rips_complex.create_simplex_tree(max_dimension=2)\n", + "filt_simplex_tree = list(unfilt_simplex_tree.get_filtration())\n", + "simplex_tree = []\n", + "for filtered_value in filt_simplex_tree:\n", + " simplex_tree.append(filtered_value[0])\n", + "\n", + "simplex_tree = sort_simplices(simplex_tree)" ] }, { @@ -337,12 +462,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "id": "246e41ce", "metadata": {}, "outputs": [], "source": [ - "# write your code here" + "# write your code here\n", + "import numpy as np\n", + "def generate_boundary_operator(simplex_tree, k):\n", + " num_k_simplicies = len(simplex_tree[k])\n", + " num_k1_simplicies = len(simplex_tree[k-1])\n", + " op = np.zeros((num_k_simplicies, num_k1_simplicies), dtype=int)\n", + " for i in range(num_k_simplicies):\n", + " simplex = simplex_tree[k][i]\n", + " row = [0] * num_k1_simplicies\n", + " sub_simplicies = [simplex[:j] + simplex[j + 1:] for j in range(len(simplex))]\n", + " sub_simplicies.sort()\n", + " for z,sub in enumerate(sub_simplicies):\n", + " index = simplex_tree[k-1].index(sub)\n", + " op[i,index] = (-1)**z\n", + "\n", + " return op.T" ] }, { @@ -423,12 +563,128 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "8f818f78", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Unitary Matrix:\n", + " [[-0.03097503-0.99952016j 0. +0.j 0. +0.j\n", + " ... 0. +0.j 0. +0.j\n", + " 0. +0.j ]\n", + " [ 0. +0.j -0.03097503-0.99952016j 0. +0.j\n", + " ... 0. +0.j 0. +0.j\n", + " 0. +0.j ]\n", + " [ 0. +0.j 0. +0.j -0.03097503-0.99952016j\n", + " ... 0. +0.j 0. +0.j\n", + " 0. +0.j ]\n", + " ...\n", + " [ 0. +0.j 0. +0.j 0. +0.j\n", + " ... 0.95507364-0.29636858j 0. +0.j\n", + " 0. +0.j ]\n", + " [ 0. +0.j 0. +0.j 0. +0.j\n", + " ... 0. +0.j 0.95507364-0.29636858j\n", + " 0. +0.j ]\n", + " [ 0. +0.j 0. +0.j 0. +0.j\n", + " ... 0. +0.j 0. +0.j\n", + " 0.95507364-0.29636858j]]\n", + "Is Unitary: True\n", + "Instruction(name='unitary', num_qubits=12, num_clbits=0, params=[array([[-0.03097503-0.99952016j, 0. +0.j ,\n", + " 0. +0.j , ..., 0. +0.j ,\n", + " 0. +0.j , 0. +0.j ],\n", + " [ 0. +0.j , -0.03097503-0.99952016j,\n", + " 0. +0.j , ..., 0. +0.j ,\n", + " 0. +0.j , 0. +0.j ],\n", + " [ 0. +0.j , 0. +0.j ,\n", + " -0.03097503-0.99952016j, ..., 0. +0.j ,\n", + " 0. +0.j , 0. +0.j ],\n", + " ...,\n", + " [ 0. +0.j , 0. +0.j ,\n", + " 0. +0.j , ..., 0.95507364-0.29636858j,\n", + " 0. +0.j , 0. +0.j ],\n", + " [ 0. +0.j , 0. +0.j ,\n", + " 0. +0.j , ..., 0. +0.j ,\n", + " 0.95507364-0.29636858j, 0. +0.j ],\n", + " [ 0. +0.j , 0. +0.j ,\n", + " 0. +0.j , ..., 0. +0.j ,\n", + " 0. +0.j , 0.95507364-0.29636858j]])])\n" + ] + } + ], "source": [ - "# write your code here" + "# write your code here\n", + "import numpy as np\n", + "from scipy.sparse import csr_matrix\n", + "import scipy as scipy\n", + "def need_padding(laplacian: np.ndarray):\n", + " size = laplacian.shape[0]\n", + " if (size & (size - 1)) == 0:\n", + " return False, size\n", + "\n", + " next_power_of_2 = 2**(size-1).bit_length()\n", + " return True, next_power_of_2\n", + "\n", + "def build_laplacian(simplex_tree):\n", + " d1 = generate_boundary_operator(simplex_tree, 1)\n", + " d1_sparse = csr_matrix(d1)\n", + " d2 = generate_boundary_operator(simplex_tree, 2)\n", + " d2_sparse = csr_matrix(d2)\n", + " # convert to sparse for matrix multiplication speedup\n", + "\n", + " # laplacian = np.add(np.matmul((d1.T), d1), np.matmul(d2, (d2.T)))\n", + " # np.reshape(laplacian, (6,6))\n", + " # padding_bool, size = need_padding(laplacian)\n", + "\n", + " laplacian = d1_sparse.conj().T @ d1_sparse + d2_sparse @ d2_sparse.conj().T\n", + " laplacian = laplacian.toarray()\n", + " # laplacian = np.reshape(laplacian, (6, 6)) # Only reshape if you need it\n", + " padding_bool, size = need_padding(laplacian)\n", + "\n", + " if padding_bool:\n", + " n = laplacian.shape[0]\n", + " max_bound = -np.inf\n", + " for i in range(n):\n", + " center = laplacian[i, i]\n", + " radius = sum(abs(laplacian[i, j]) for j in range(n) if j != i)\n", + " max_bound = max(max_bound, center + radius)\n", + " \n", + " padded_laplacian = np.pad(laplacian, (0, size - laplacian.shape[0]))\n", + "\n", + " for i in range(laplacian.shape[0], size): padded_laplacian[i,i] = max_bound/2\n", + " \n", + " # scaling_factor = (2*math.pi) // max_bound\n", + " # padded_laplacian = scaling_factor * padded_laplacian\n", + " \n", + " return padded_laplacian\n", + "\n", + "from qiskit.circuit.library import UnitaryGate\n", + "def convert_to_unitary(laplacian):\n", + " \"\"\"\n", + " Convert the Laplacian into a unitary matrix using matrix exponentiation.\n", + " \"\"\"\n", + " # Ensure Laplacian is Hermitian before exponentiation\n", + " laplacian = 0.5 * (laplacian + laplacian.T)\n", + "\n", + " # Compute matrix exponential\n", + " iH = -1j * laplacian\n", + " U = scipy.linalg.expm(iH)\n", + "\n", + " # Check unitarity\n", + " identity_check = np.allclose(U @ U.conj().T, np.eye(U.shape[0]), atol=1e-8)\n", + " if not identity_check:\n", + " raise ValueError(\"Error: Resulting matrix is not unitary\")\n", + "\n", + " print(\"Unitary Matrix:\\n\", U) # Debugging Output\n", + " print(\"Is Unitary:\", identity_check) # Should be True\n", + "\n", + " return UnitaryGate(U)\n", + "\n", + "I = build_laplacian(simplex_tree)\n", + "H = convert_to_unitary(I)\n", + "print(H)" ] }, { @@ -502,9 +758,55 @@ "execution_count": null, "id": "848ef40c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "AB\n", + "0\n" + ] + } + ], "source": [ - "# write your code here" + "# write your code here\n", + "\n", + "from qiskit import QuantumCircuit, transpile\n", + "# from qiskit.algorithms import PhaseEstimation\n", + "from qiskit.circuit.library import QFT\n", + "from qiskit.quantum_info import Statevector\n", + "from qiskit_aer import Aer\n", + "from qiskit.circuit.library import PhaseEstimation\n", + "\n", + "def run_qpe(H, qpe_qubits=3):\n", + " \"\"\"\n", + " Runs QPE to estimate eigenvalues of H.\n", + "\n", + " Args:\n", + " H: Unitary matrix.\n", + " qpe_qubits: Number of phase estimation qubits.\n", + "\n", + " Returns:\n", + " Probability of measuring all-zero states (p(0)).\n", + " \"\"\"\n", + " # Build QPE circuit\n", + " qpe = PhaseEstimation(num_evaluation_qubits=qpe_qubits, unitary=H)\n", + " circuit = qpe.circuit\n", + "\n", + " # Simulate\n", + " simulator = Aer.get_backend('statevector_simulator')\n", + " result = simulator.run(circuit).result()\n", + " statevector = result.get_statevector()\n", + " \n", + " # Measure probability of all-zero state\n", + " zero_state = '0' * qpe_qubits\n", + " p_zero = np.abs(statevector[Statevector.from_label(zero_state).data]) ** 2\n", + "\n", + " return p_zero\n", + "\n", + "# Example usage:\n", + "p_zero = run_qpe(H)\n", + "betti_estimate = int(round((2 ** 3) * p_zero)) # β_k = 2^q * p(0)" ] }, { @@ -560,7 +862,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "5ef68f98", "metadata": {}, "outputs": [], @@ -583,6 +885,204 @@ "# write your code here" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "665df20a-1f52-415b-ac9b-4483cae32836", + "metadata": {}, + "outputs": [], + "source": [ + "import statsmodels.api as sm\n", + "from scipy.stats import genextreme\n", + "\n", + "# arbitrary Betti numbers\n", + "betti_numbers = np.array([12, 14, 13, 15, 16, 14, 10, 5, 3])\n", + "\n", + "# we are computing the differences via x_i-x_{i-1}\n", + "D = np.diff(betti_numbers)\n", + "\n", + "print(\"Differences D_i:\", D)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6252f18-1fb7-41ef-8e06-2483c51ba22a", + "metadata": {}, + "outputs": [], + "source": [ + "def newey_west_variance(D, max_lag=None):\n", + " \"\"\"\n", + " What we have is data that is NOT independant. Therefore, we can't\n", + " implement normal variance and\n", + "\n", + " Parameters:\n", + " D\n", + " max_lag (int): Maximum lag order.\n", + "\n", + " Returns:\n", + " float: Newey-West standard deviation\n", + " \"\"\"\n", + " if max_lag is None:\n", + " max_lag = int(4 * (len(D) / 100) ** (2 / 9)) # Rule-of-thumb bandwidth\n", + "\n", + " # Compute sample mean\n", + " D_mean = np.mean(D)\n", + "\n", + " # Compute autocovariances\n", + " n = len(D)\n", + " gamma = np.array([\n", + " np.sum((D[:n - j] - D_mean) * (D[j:] - D_mean)) / n\n", + " for j in range(max_lag + 1)\n", + " ])\n", + "\n", + " # Bartlett weights for Newey-West estimator\n", + " weights = 1 - np.arange(1, max_lag + 1) / (max_lag + 1)\n", + "\n", + " # Newey-West variance formula\n", + " nw_var = gamma[0] + 2 * np.sum(weights * gamma[1:])\n", + "\n", + " return np.sqrt(nw_var) # Convert variance to standard deviation\n", + "\n", + "nw_std = newey_west_variance(D)\n", + "print(\"Newey-West Adjusted Standard Deviation:\", nw_std)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df265c44-4a1d-468e-ba8a-3d7e90433cc1", + "metadata": {}, + "outputs": [], + "source": [ + "# Compute delta (3 * adjusted standard deviation) (to get 99% power test)\n", + "#\n", + "delta = 2 * np.std(D)\n", + "\n", + "nw_std = newey_west_variance(D)\n", + "delta_adjusted = nw_std\n", + "\n", + "print(f\"this is D: {D}\")\n", + "\n", + "# Split data into null and alternative distributions\n", + "D_H0 = D[D >= -delta_adjusted]\n", + "D_HA = D[D < -delta_adjusted]\n", + "\n", + "\n", + "print(f\"this is delta: {delta}, this is Newey-West SD: {nw_std}, this is the adjusted {delta_adjusted}, this is the null hypothesis data: {D_H0}, this is the alternative data: {D_HA}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df2b486c-eefa-49fb-a253-f6d4dabaf8d8", + "metadata": {}, + "outputs": [], + "source": [ + "#defining our Fréchet distribution function\n", + "def fit_frechet(data):\n", + " shape, loc, scale = genextreme.fit(-data)\n", + " return -shape, loc, scale\n", + "\n", + "params_H0 = fit_frechet(D_H0)\n", + "params_HA = fit_frechet(D_HA)\n", + "\n", + "print(\"H0 Parameters (shape, loc, scale):\", params_H0)\n", + "print(\"HA Parameters (shape, loc, scale):\", params_HA)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cd1c5f2f-530f-4e8a-8c61-e17ab1c979df", + "metadata": {}, + "outputs": [], + "source": [ + "#this is the function that will define our log likelihood ratio\n", + "def log_likelihood_ratio(data, params_H0, params_HA):\n", + " shape_H0, loc_H0, scale_H0 = params_H0\n", + " shape_HA, loc_HA, scale_HA = params_HA\n", + "\n", + " logpdf_H0 = genextreme.logpdf(-data, -shape_H0, loc=loc_H0, scale=scale_H0)\n", + " logpdf_HA = genextreme.logpdf(-data, -shape_HA, loc=loc_HA, scale=scale_HA)\n", + "\n", + " print(f\"logpdf_H0: {logpdf_H0}\")\n", + " print(f\"logpdf_HA: {logpdf_HA}\")\n", + "\n", + " if np.any(np.isnan(logpdf_H0)) or np.any(np.isnan(logpdf_HA)):\n", + " print(\"WARNING: NaN values found in logpdf!\")\n", + " if np.any(np.isinf(logpdf_H0)) or np.any(np.isinf(logpdf_HA)):\n", + " print(\"WARNING: Inf values found in logpdf!\")\n", + "\n", + " log_likelihood_H0 = np.sum(logpdf_H0)\n", + " log_likelihood_HA = np.sum(logpdf_HA)\n", + "\n", + " print(f\"log_likelihood_H0: {log_likelihood_H0}\")\n", + " print(f\"log_likelihood_HA: {log_likelihood_HA}\")\n", + "\n", + " return log_likelihood_HA - log_likelihood_H0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "289d3436-1728-44e9-9a5d-98e88bf5ef26", + "metadata": {}, + "outputs": [], + "source": [ + "#this is where we do our Monte Carlo simulation\n", + "def simulate_frechet(n, shape, loc, scale, num_simulations=1000):\n", + " return -genextreme.rvs(-shape, loc=loc, scale=scale, size=(num_simulations, n))\n", + "\n", + "# Simulate under H0\n", + "simulated_D = simulate_frechet(len(D), *params_H0, num_simulations=1000)\n", + "\n", + "# Compute log-likelihood ratios for each simulated sequence\n", + "simulated_LLRs = np.array([log_likelihood_ratio(seq, params_H0, params_HA) for seq in simulated_D])\n", + "\n", + "# 1st percentile for critical value\n", + "c = np.percentile(simulated_LLRs, 1)\n", + "\n", + "print(\"Critical value c:\", c)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c25ef484-3b27-4313-b59a-85fb7a0d7d76", + "metadata": {}, + "outputs": [], + "source": [ + "def sequential_likelihood_ratio_test(D, params_H0, params_HA, c):\n", + " \"\"\"\n", + " we now run the sequential likelihood ratio, checking each x_i wrt x_i-1\n", + " we accept sequentially, but if we ever find one x_i such that we are below the\n", + " threshold, we immediately reject, as this signals a crash in the market\n", + " \"\"\"\n", + " shape_H0, loc_H0, scale_H0 = params_H0\n", + " shape_HA, loc_HA, scale_HA = params_HA\n", + "\n", + " for d in D:\n", + " llr = genextreme.logpdf(-d, -shape_HA, loc=loc_HA, scale=scale_HA) - \\\n", + " genextreme.logpdf(-d, -shape_H0, loc=loc_H0, scale=scale_H0)\n", + "\n", + " if llr < c:\n", + " return True # Crash detected\n", + "\n", + " return False # No crash detected\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2059cf3e-0c57-42a0-af58-5576effc09e6", + "metadata": {}, + "outputs": [], + "source": [ + "crash_happened = sequential_likelihood_ratio_test(D, params_H0, params_HA, c)\n", + "print(\"Crash detected:\", crash_happened)" + ] + }, { "cell_type": "markdown", "id": "d1c2e4b5", @@ -617,6 +1117,97 @@ " Implement a quantum TDA algorithm for persistent Betti numbers. Esstimating the persistent Betti numbers is a more general task than estimating the Betti number and it is more practical for TDA. It is an open problem to construct a quantum algorithm for the persistent Betti numbers in a way that is preferable for NISQ devices, and the only current implementation of a quantum algorithm for persistent betti number is shown [here](https://quantum-journal.org/papers/q-2022-12-07-873/pdf/)." ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "d278e7af-b0c9-4db2-bd1e-3916f644a0bb", + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "# Answer to the Bonus question: Future Directions for Quantum TDA: A Hybrid NISQ-Friendly Pipeline \n", + "To address the limitations of Quantum Phase Estimation (QPE) in practical quantum TDA pipelines, we propose replacing QPE with variational algorithms such as the **Variational Quantum Eigensolver (VQE)** and **Variational Quantum Deflation (VQD)**. These methods are better suited for NISQ devices due to their shorter circuit depths and hybrid quantum-classical workflows. Below is a structured approach to building a hybrid pipeline and benchmarking its performance.\n", + "\n", + "---\n", + "\n", + "## 1. Proposed Hybrid Pipeline \n", + "### Step 1: Hamiltonian Encoding \n", + "- **Combinatorial Laplacian as a Hamiltonian**: \n", + " Represent the combinatorial Laplacian \\(\\Delta_k\\) (used to compute Betti numbers) as a sum of Pauli operators. This involves decomposing \\(\\Delta_k\\) into tensor products of Pauli matrices (\\(X, Y, Z, I\\)), similar to the adjacency/Laplacian matrix encoding in the provided paper. \n", + " - Example: For a simplicial complex with \\(n\\) simplices, pad \\(\\Delta_k\\) to size \\(2^{\\lceil \\log_2 n \\rceil} \\times 2^{\\lceil \\log_2 n \\rceil}\\) and map it to qubit operators.\n", + "\n", + "### Step 2: Ansatz Design \n", + "- **Adaptive Ansatz for TDA**: \n", + " Use a parameterized quantum circuit (ansatz) tailored for graph/simplicial complex structures. Examples include: \n", + " - **Layered Hardware-Efficient Ansatz**: Alternating layers of single-qubit rotations (\\(R_X, R_Z\\)) and entangling gates (e.g., CNOT or CZ). \n", + " - **Graph-Inspired Ansatz**: Incorporate connectivity patterns from the simplicial complex into the ansatz topology. \n", + " - **Symmetry-Preserving Ansatz**: Exploit the symmetry of \\(\\Delta_k\\) (e.g., sparsity or block structure) to reduce parameter counts.\n", + "\n", + "### Step 3: Optimization Loop \n", + "- **VQE for Ground State**: \n", + " Minimize \\(\\langle \\psi(\\theta) | \\Delta_k | \\psi(\\theta) \\rangle\\) to estimate the smallest eigenvalue (closest to zero). \n", + "- **VQD for Excited States**: \n", + " Iteratively deflate \\(\\Delta_k\\) to estimate higher eigenvalues: \n", + " 1. After finding the ground state \\(|\\psi_0\\rangle\\), modify \\(\\Delta_k \\rightarrow \\Delta_k - \\lambda_0 |\\psi_0\\rangle\\langle\\psi_0|\\). \n", + " 2. Repeat VQE on the deflated Hamiltonian to find the next eigenvalue. \n", + "\n", + "### Step 4: Noise Mitigation \n", + "- **Error Suppression**: \n", + " Use techniques like zero-noise extrapolation (ZNE), dynamical decoupling, or measurement error mitigation to improve eigenvalue estimates. \n", + "- **Noise-Adaptive Ansatz**: \n", + " Design ansätze with reduced gate counts or tailored to device noise profiles (e.g., avoiding fragile two-qubit gates).\n", + "\n", + "---\n", + "\n", + "## 2. Benchmarking Framework \n", + "Compare the hybrid pipeline against textbook QPE across key metrics: \n", + "\n", + "| **Metric** | **VQE/VQD** | **QPE** | \n", + "|-------------------------|---------------------------------------|---------------------------------------| \n", + "| Circuit Depth | Shallow (polynomial in \\(n\\)) | Deep (exponential in precision \\(q\\)) | \n", + "| Qubit Count | \\(O(\\log n)\\) | \\(O(\\log n + q)\\) | \n", + "| Classical Cost | Iterative optimization (\\(O(\\text{poly}(n))\\)) | Post-processing (\\(O(2^q)\\)) | \n", + "| Noise Resilience | Robust (short circuits) | Fragile (long coherence required) | \n", + "| Eigenvalue Accuracy | Approximate (\\(\\pm \\epsilon\\)) | Exact (up to precision \\(q\\)) | \n", + "\n", + "### Key Experiments \n", + "1. **Simulation**: \n", + " - Use quantum simulators (e.g., Qiskit Aer) to compare accuracy and convergence rates for small simplicial complexes (\\(n \\leq 10\\)). \n", + "2. **Hardware Runs**: \n", + " - Deploy on NISQ devices (e.g., IBMQ, Rigetti) to test noise resilience and runtime. \n", + "3. **Scaling Tests**: \n", + " - Measure runtime and error growth as \\(n\\) increases, comparing empirical results to theoretical predictions (e.g., \\(O(n^2)\\) vs. \\(O(2^n)\\)). \n", + "\n", + "---\n", + "\n", + "## 3. Advantages and Challenges \n", + "### Advantages \n", + "- **NISQ Compatibility**: Shallow circuits reduce sensitivity to decoherence. \n", + "- **Flexibility**: Ansatz and optimizer can be tailored to specific problem structures. \n", + "- **Cost-Efficiency**: Classical optimization leverages existing HPC resources. \n", + "\n", + "### Challenges \n", + "- **Ansatz Expressivity**: Poorly chosen ansätze may fail to capture eigenstates. \n", + "- **Optimization Traps**: Classical optimizers (e.g., COBYLA, SPSA) may converge to local minima. \n", + "- **Noise Propagation**: Errors in eigenvalue estimates compound during deflation (VQD). \n", + "\n", + "---\n", + "\n", + "## 4. Implementation Roadmap \n", + "1. **Codebase Integration**: \n", + " - Extend existing quantum TDA libraries (e.g., Qiskit, Pennylane) to support combinatorial Laplacians and variational algorithms. \n", + "2. **Hardware Experiments**: \n", + " - Run on IBMQ or Rigetti devices for \\(n=4\\)–\\(8\\) simplices, comparing Betti number estimates to classical TDA tools (e.g., GUDHI). \n", + "3. **Algorithm Refinement**: \n", + " - Develop automated ansatz design tools (e.g., genetic algorithms) and noise-adaptive optimizers. \n", + "\n", + "---\n", + "\n", + "## Conclusion \n", + "By replacing QPE with VQE/VQD, we create a **NISQ-friendly quantum TDA pipeline** that balances accuracy and practicality. While variational methods introduce approximations, their compatibility with near-term hardware enables real-world applications in finance, biology, and materials science. Future work should focus on optimizing ansatz design and integrating error mitigation to achieve quantum advantage in topological data analysis. \n", + "\n" + ] + }, { "cell_type": "code", "execution_count": null, @@ -624,7 +1215,49 @@ "metadata": {}, "outputs": [], "source": [ - "# write your code here" + "# write your code here\n", + "\n", + "!pip install qiskit-terra # Install qiskit-terra\n", + "!pip install qiskit[visualization] # Install visualization tools, if you need them\n", + "\n", + "# Restart runtime here (Runtime -> Restart runtime)\n", + "\n", + "import qiskit.algorithms\n", + "print(qiskit.algorithms.__version__) # Check version. Should be 0.24.0 or higher\n", + "\n", + "from qiskit.algorithms import VQE\n", + "from qiskit.algorithms.optimizers import COBYLA\n", + "from qiskit.circuit.library import TwoLocal\n", + "from qiskit.opflow import PauliSumOp # Or SparsePauliOp\n", + "from qiskit.quantum_info import SparsePauliOp\n", + "from qiskit.primitives import Sampler\n", + "from qiskit import Aer\n", + "\n", + "# ... (rest of your VQE code as before, replacing the placeholder for the Laplacian)\n", + "\n", + "# Example Laplacian (replace with your actual Laplacian):\n", + "pauli_list = [\n", + " (\"ZZII\", 0.25),\n", + " (\"IIZZ\", 0.25),\n", + " (\"ZIZI\", -0.1),\n", + " (\"IXZX\", -0.1),\n", + "]\n", + "pauli_laplacian = SparsePauliOp.from_list(pauli_list)\n", + "\n", + "\n", + "num_qubits = 4\n", + "ansatz = TwoLocal(num_qubits=num_qubits, rotation_blocks=[\"rx\", \"rz\"], entanglement_blocks=\"cz\", reps=2)\n", + "optimizer = COBYLA(maxiter=100)\n", + "\n", + "backend = Aer.get_backend('statevector_simulator')\n", + "sampler = Sampler(backend=backend)\n", + "\n", + "vqe = VQE(sampler=sampler, ansatz=ansatz, optimizer=optimizer)\n", + "\n", + "result = vqe.compute_minimum_eigenvalue(pauli_laplacian)\n", + "\n", + "print(\"Estimated smallest eigenvalue:\", result.eigenvalue)\n", + "print(\"Optimal parameters:\", result.optimal_parameters)" ] }, { @@ -638,9 +1271,9 @@ ], "metadata": { "kernelspec": { - "display_name": "tda", + "display_name": "Python 3 [moodys]", "language": "python", - "name": "python3" + "name": "python3_moodys_yhl12u" }, "language_info": { "codemirror_mode": { @@ -652,7 +1285,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.11" + "version": "3.11.9" } }, "nbformat": 4,