From f9a834debc58f4036669b802ef65e88eab52de37 Mon Sep 17 00:00:00 2001
From: alexhold5 <87720137+alexhold5@users.noreply.github.com>
Date: Sun, 2 Feb 2025 10:23:32 -0500
Subject: [PATCH] Add files via upload
---
moodys_challenge.ipynb | 695 +++++++++++++++++++++++++++++++++++++++--
1 file changed, 664 insertions(+), 31 deletions(-)
diff --git a/moodys_challenge.ipynb b/moodys_challenge.ipynb
index 7ee0e87..91c9f70 100644
--- a/moodys_challenge.ipynb
+++ b/moodys_challenge.ipynb
@@ -161,15 +161,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 1,
"id": "4ec880cd",
- "metadata": {},
- "outputs": [],
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "done\n"
+ ]
+ }
+ ],
"source": [
+ "#testing\n",
+ "\n",
"import pandas as pd\n",
"import numpy as np\n",
"\n",
- "time_series = np.log(pd.read_csv(\"SP500.csv\", header=None).to_numpy().squeeze())"
+ "time_series = np.log(pd.read_csv(\"sp500.csv\", header=None).to_numpy().squeeze())"
]
},
{
@@ -209,36 +221,93 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 2,
"id": "7b701be6",
"metadata": {},
"outputs": [],
"source": [
- "# write your code here"
+ "# write your code here\n",
+ "\n",
+ "\n",
+ "def takens_embedding(time_series, N, d):\n",
+ " \"\"\"\n",
+ " We are using Taken's embedding theorem, which states that we can\n",
+ " reconstruct the topology and dynamical behavior of the global system\n",
+ " via a vector of dimension d of the time delayed data points\n",
+ "\n",
+ " \"\"\"\n",
+ " L = len(time_series)\n",
+ " if L < (N - 1) * d:\n",
+ " raise ValueError(\"Time series is too short for given N and d.\")\n",
+ " # list comprehension to create the time series\n",
+ " return np.array([time_series[i : i + (N - 1) * d + 1 : d] for i in range(L - (N - 1) * d)])\n",
+ "\n",
+ "\n"
]
},
{
- "cell_type": "markdown",
- "id": "9376bd0b",
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "e0140903-54bb-42bd-9df1-ace45517b265",
"metadata": {},
+ "outputs": [],
"source": [
- "
NOTE:\n",
+ "
\n",
+ " \n",
+ "BONUS EXERCISE: \n",
"\n",
- "From step 2, we present an example originally provided in the appendix of [Khandelwal's and Chandra's paper](https://arxiv.org/abs/2302.09553). This example can be used to verify your code.\n",
+ "`gtda.time_series.TakensEmbedding` can conduct this transformation. Try avoid using this function and build your own embedding function.\n",
"\n",
"
"
]
@@ -273,12 +342,68 @@
},
{
"cell_type": "code",
- "execution_count": null,
- "id": "026d6b61",
+ "execution_count": 5,
+ "id": "efcda15b-3685-431a-91de-cb1929743103",
"metadata": {},
"outputs": [],
"source": [
- "# write your code here"
+ "def sort_simplices(input_list):\n",
+ " \"\"\"\n",
+ " Organizes a list of simplices into groups based on dimension.\n",
+ " \n",
+ " Args:\n",
+ " - input_list (list of lists): A list containing simplices of different dimensions.\n",
+ " \n",
+ " Returns:\n",
+ " - A nested list: [[0-simplices], [1-simplices], [2-simplices], ...]\n",
+ " \"\"\"\n",
+ " # Sort elements into categories based on length\n",
+ " sorted_simplices = {}\n",
+ " \n",
+ " for simplex in input_list:\n",
+ " key = len(simplex) - 1 # Dimension of the simplex (0-based)\n",
+ " simplex_sorted = sorted(simplex) # Ensure order consistency\n",
+ " \n",
+ " if key not in sorted_simplices:\n",
+ " sorted_simplices[key] = []\n",
+ " \n",
+ " sorted_simplices[key].append(simplex_sorted)\n",
+ "\n",
+ " # Convert the dictionary into a list of lists, ensuring correct order\n",
+ " return [sorted_simplices.get(dim, []) for dim in sorted(sorted_simplices.keys())]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "026d6b61",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "done\n"
+ ]
+ }
+ ],
+ "source": [
+ "# write your code here\n",
+ "import gudhi\n",
+ "\n",
+ "point_cloud = point_cloud.reshape(point_cloud.shape[0], -1)\n",
+ "\n",
+ "epsilon = 12\n",
+ "rips_complex = gudhi.RipsComplex(points=point_cloud, max_edge_length = epsilon)\n",
+ "unfilt_simplex_tree = rips_complex.create_simplex_tree(max_dimension=2)\n",
+ "filt_simplex_tree = list(unfilt_simplex_tree.get_filtration())\n",
+ "simplex_tree = []\n",
+ "for filtered_value in filt_simplex_tree:\n",
+ " simplex_tree.append(filtered_value[0])\n",
+ "\n",
+ "simplex_tree = sort_simplices(simplex_tree)"
]
},
{
@@ -337,12 +462,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 7,
"id": "246e41ce",
"metadata": {},
"outputs": [],
"source": [
- "# write your code here"
+ "# write your code here\n",
+ "import numpy as np\n",
+ "def generate_boundary_operator(simplex_tree, k):\n",
+ " num_k_simplicies = len(simplex_tree[k])\n",
+ " num_k1_simplicies = len(simplex_tree[k-1])\n",
+ " op = np.zeros((num_k_simplicies, num_k1_simplicies), dtype=int)\n",
+ " for i in range(num_k_simplicies):\n",
+ " simplex = simplex_tree[k][i]\n",
+ " row = [0] * num_k1_simplicies\n",
+ " sub_simplicies = [simplex[:j] + simplex[j + 1:] for j in range(len(simplex))]\n",
+ " sub_simplicies.sort()\n",
+ " for z,sub in enumerate(sub_simplicies):\n",
+ " index = simplex_tree[k-1].index(sub)\n",
+ " op[i,index] = (-1)**z\n",
+ "\n",
+ " return op.T"
]
},
{
@@ -423,12 +563,128 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 8,
"id": "8f818f78",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Unitary Matrix:\n",
+ " [[-0.03097503-0.99952016j 0. +0.j 0. +0.j\n",
+ " ... 0. +0.j 0. +0.j\n",
+ " 0. +0.j ]\n",
+ " [ 0. +0.j -0.03097503-0.99952016j 0. +0.j\n",
+ " ... 0. +0.j 0. +0.j\n",
+ " 0. +0.j ]\n",
+ " [ 0. +0.j 0. +0.j -0.03097503-0.99952016j\n",
+ " ... 0. +0.j 0. +0.j\n",
+ " 0. +0.j ]\n",
+ " ...\n",
+ " [ 0. +0.j 0. +0.j 0. +0.j\n",
+ " ... 0.95507364-0.29636858j 0. +0.j\n",
+ " 0. +0.j ]\n",
+ " [ 0. +0.j 0. +0.j 0. +0.j\n",
+ " ... 0. +0.j 0.95507364-0.29636858j\n",
+ " 0. +0.j ]\n",
+ " [ 0. +0.j 0. +0.j 0. +0.j\n",
+ " ... 0. +0.j 0. +0.j\n",
+ " 0.95507364-0.29636858j]]\n",
+ "Is Unitary: True\n",
+ "Instruction(name='unitary', num_qubits=12, num_clbits=0, params=[array([[-0.03097503-0.99952016j, 0. +0.j ,\n",
+ " 0. +0.j , ..., 0. +0.j ,\n",
+ " 0. +0.j , 0. +0.j ],\n",
+ " [ 0. +0.j , -0.03097503-0.99952016j,\n",
+ " 0. +0.j , ..., 0. +0.j ,\n",
+ " 0. +0.j , 0. +0.j ],\n",
+ " [ 0. +0.j , 0. +0.j ,\n",
+ " -0.03097503-0.99952016j, ..., 0. +0.j ,\n",
+ " 0. +0.j , 0. +0.j ],\n",
+ " ...,\n",
+ " [ 0. +0.j , 0. +0.j ,\n",
+ " 0. +0.j , ..., 0.95507364-0.29636858j,\n",
+ " 0. +0.j , 0. +0.j ],\n",
+ " [ 0. +0.j , 0. +0.j ,\n",
+ " 0. +0.j , ..., 0. +0.j ,\n",
+ " 0.95507364-0.29636858j, 0. +0.j ],\n",
+ " [ 0. +0.j , 0. +0.j ,\n",
+ " 0. +0.j , ..., 0. +0.j ,\n",
+ " 0. +0.j , 0.95507364-0.29636858j]])])\n"
+ ]
+ }
+ ],
"source": [
- "# write your code here"
+ "# write your code here\n",
+ "import numpy as np\n",
+ "from scipy.sparse import csr_matrix\n",
+ "import scipy as scipy\n",
+ "def need_padding(laplacian: np.ndarray):\n",
+ " size = laplacian.shape[0]\n",
+ " if (size & (size - 1)) == 0:\n",
+ " return False, size\n",
+ "\n",
+ " next_power_of_2 = 2**(size-1).bit_length()\n",
+ " return True, next_power_of_2\n",
+ "\n",
+ "def build_laplacian(simplex_tree):\n",
+ " d1 = generate_boundary_operator(simplex_tree, 1)\n",
+ " d1_sparse = csr_matrix(d1)\n",
+ " d2 = generate_boundary_operator(simplex_tree, 2)\n",
+ " d2_sparse = csr_matrix(d2)\n",
+ " # convert to sparse for matrix multiplication speedup\n",
+ "\n",
+ " # laplacian = np.add(np.matmul((d1.T), d1), np.matmul(d2, (d2.T)))\n",
+ " # np.reshape(laplacian, (6,6))\n",
+ " # padding_bool, size = need_padding(laplacian)\n",
+ "\n",
+ " laplacian = d1_sparse.conj().T @ d1_sparse + d2_sparse @ d2_sparse.conj().T\n",
+ " laplacian = laplacian.toarray()\n",
+ " # laplacian = np.reshape(laplacian, (6, 6)) # Only reshape if you need it\n",
+ " padding_bool, size = need_padding(laplacian)\n",
+ "\n",
+ " if padding_bool:\n",
+ " n = laplacian.shape[0]\n",
+ " max_bound = -np.inf\n",
+ " for i in range(n):\n",
+ " center = laplacian[i, i]\n",
+ " radius = sum(abs(laplacian[i, j]) for j in range(n) if j != i)\n",
+ " max_bound = max(max_bound, center + radius)\n",
+ " \n",
+ " padded_laplacian = np.pad(laplacian, (0, size - laplacian.shape[0]))\n",
+ "\n",
+ " for i in range(laplacian.shape[0], size): padded_laplacian[i,i] = max_bound/2\n",
+ " \n",
+ " # scaling_factor = (2*math.pi) // max_bound\n",
+ " # padded_laplacian = scaling_factor * padded_laplacian\n",
+ " \n",
+ " return padded_laplacian\n",
+ "\n",
+ "from qiskit.circuit.library import UnitaryGate\n",
+ "def convert_to_unitary(laplacian):\n",
+ " \"\"\"\n",
+ " Convert the Laplacian into a unitary matrix using matrix exponentiation.\n",
+ " \"\"\"\n",
+ " # Ensure Laplacian is Hermitian before exponentiation\n",
+ " laplacian = 0.5 * (laplacian + laplacian.T)\n",
+ "\n",
+ " # Compute matrix exponential\n",
+ " iH = -1j * laplacian\n",
+ " U = scipy.linalg.expm(iH)\n",
+ "\n",
+ " # Check unitarity\n",
+ " identity_check = np.allclose(U @ U.conj().T, np.eye(U.shape[0]), atol=1e-8)\n",
+ " if not identity_check:\n",
+ " raise ValueError(\"Error: Resulting matrix is not unitary\")\n",
+ "\n",
+ " print(\"Unitary Matrix:\\n\", U) # Debugging Output\n",
+ " print(\"Is Unitary:\", identity_check) # Should be True\n",
+ "\n",
+ " return UnitaryGate(U)\n",
+ "\n",
+ "I = build_laplacian(simplex_tree)\n",
+ "H = convert_to_unitary(I)\n",
+ "print(H)"
]
},
{
@@ -502,9 +758,55 @@
"execution_count": null,
"id": "848ef40c",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "AB\n",
+ "0\n"
+ ]
+ }
+ ],
"source": [
- "# write your code here"
+ "# write your code here\n",
+ "\n",
+ "from qiskit import QuantumCircuit, transpile\n",
+ "# from qiskit.algorithms import PhaseEstimation\n",
+ "from qiskit.circuit.library import QFT\n",
+ "from qiskit.quantum_info import Statevector\n",
+ "from qiskit_aer import Aer\n",
+ "from qiskit.circuit.library import PhaseEstimation\n",
+ "\n",
+ "def run_qpe(H, qpe_qubits=3):\n",
+ " \"\"\"\n",
+ " Runs QPE to estimate eigenvalues of H.\n",
+ "\n",
+ " Args:\n",
+ " H: Unitary matrix.\n",
+ " qpe_qubits: Number of phase estimation qubits.\n",
+ "\n",
+ " Returns:\n",
+ " Probability of measuring all-zero states (p(0)).\n",
+ " \"\"\"\n",
+ " # Build QPE circuit\n",
+ " qpe = PhaseEstimation(num_evaluation_qubits=qpe_qubits, unitary=H)\n",
+ " circuit = qpe.circuit\n",
+ "\n",
+ " # Simulate\n",
+ " simulator = Aer.get_backend('statevector_simulator')\n",
+ " result = simulator.run(circuit).result()\n",
+ " statevector = result.get_statevector()\n",
+ " \n",
+ " # Measure probability of all-zero state\n",
+ " zero_state = '0' * qpe_qubits\n",
+ " p_zero = np.abs(statevector[Statevector.from_label(zero_state).data]) ** 2\n",
+ "\n",
+ " return p_zero\n",
+ "\n",
+ "# Example usage:\n",
+ "p_zero = run_qpe(H)\n",
+ "betti_estimate = int(round((2 ** 3) * p_zero)) # β_k = 2^q * p(0)"
]
},
{
@@ -560,7 +862,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"id": "5ef68f98",
"metadata": {},
"outputs": [],
@@ -583,6 +885,204 @@
"# write your code here"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "665df20a-1f52-415b-ac9b-4483cae32836",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import statsmodels.api as sm\n",
+ "from scipy.stats import genextreme\n",
+ "\n",
+ "# arbitrary Betti numbers\n",
+ "betti_numbers = np.array([12, 14, 13, 15, 16, 14, 10, 5, 3])\n",
+ "\n",
+ "# we are computing the differences via x_i-x_{i-1}\n",
+ "D = np.diff(betti_numbers)\n",
+ "\n",
+ "print(\"Differences D_i:\", D)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d6252f18-1fb7-41ef-8e06-2483c51ba22a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def newey_west_variance(D, max_lag=None):\n",
+ " \"\"\"\n",
+ " What we have is data that is NOT independant. Therefore, we can't\n",
+ " implement normal variance and\n",
+ "\n",
+ " Parameters:\n",
+ " D\n",
+ " max_lag (int): Maximum lag order.\n",
+ "\n",
+ " Returns:\n",
+ " float: Newey-West standard deviation\n",
+ " \"\"\"\n",
+ " if max_lag is None:\n",
+ " max_lag = int(4 * (len(D) / 100) ** (2 / 9)) # Rule-of-thumb bandwidth\n",
+ "\n",
+ " # Compute sample mean\n",
+ " D_mean = np.mean(D)\n",
+ "\n",
+ " # Compute autocovariances\n",
+ " n = len(D)\n",
+ " gamma = np.array([\n",
+ " np.sum((D[:n - j] - D_mean) * (D[j:] - D_mean)) / n\n",
+ " for j in range(max_lag + 1)\n",
+ " ])\n",
+ "\n",
+ " # Bartlett weights for Newey-West estimator\n",
+ " weights = 1 - np.arange(1, max_lag + 1) / (max_lag + 1)\n",
+ "\n",
+ " # Newey-West variance formula\n",
+ " nw_var = gamma[0] + 2 * np.sum(weights * gamma[1:])\n",
+ "\n",
+ " return np.sqrt(nw_var) # Convert variance to standard deviation\n",
+ "\n",
+ "nw_std = newey_west_variance(D)\n",
+ "print(\"Newey-West Adjusted Standard Deviation:\", nw_std)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "df265c44-4a1d-468e-ba8a-3d7e90433cc1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Compute delta (3 * adjusted standard deviation) (to get 99% power test)\n",
+ "#\n",
+ "delta = 2 * np.std(D)\n",
+ "\n",
+ "nw_std = newey_west_variance(D)\n",
+ "delta_adjusted = nw_std\n",
+ "\n",
+ "print(f\"this is D: {D}\")\n",
+ "\n",
+ "# Split data into null and alternative distributions\n",
+ "D_H0 = D[D >= -delta_adjusted]\n",
+ "D_HA = D[D < -delta_adjusted]\n",
+ "\n",
+ "\n",
+ "print(f\"this is delta: {delta}, this is Newey-West SD: {nw_std}, this is the adjusted {delta_adjusted}, this is the null hypothesis data: {D_H0}, this is the alternative data: {D_HA}\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "df2b486c-eefa-49fb-a253-f6d4dabaf8d8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#defining our Fréchet distribution function\n",
+ "def fit_frechet(data):\n",
+ " shape, loc, scale = genextreme.fit(-data)\n",
+ " return -shape, loc, scale\n",
+ "\n",
+ "params_H0 = fit_frechet(D_H0)\n",
+ "params_HA = fit_frechet(D_HA)\n",
+ "\n",
+ "print(\"H0 Parameters (shape, loc, scale):\", params_H0)\n",
+ "print(\"HA Parameters (shape, loc, scale):\", params_HA)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cd1c5f2f-530f-4e8a-8c61-e17ab1c979df",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#this is the function that will define our log likelihood ratio\n",
+ "def log_likelihood_ratio(data, params_H0, params_HA):\n",
+ " shape_H0, loc_H0, scale_H0 = params_H0\n",
+ " shape_HA, loc_HA, scale_HA = params_HA\n",
+ "\n",
+ " logpdf_H0 = genextreme.logpdf(-data, -shape_H0, loc=loc_H0, scale=scale_H0)\n",
+ " logpdf_HA = genextreme.logpdf(-data, -shape_HA, loc=loc_HA, scale=scale_HA)\n",
+ "\n",
+ " print(f\"logpdf_H0: {logpdf_H0}\")\n",
+ " print(f\"logpdf_HA: {logpdf_HA}\")\n",
+ "\n",
+ " if np.any(np.isnan(logpdf_H0)) or np.any(np.isnan(logpdf_HA)):\n",
+ " print(\"WARNING: NaN values found in logpdf!\")\n",
+ " if np.any(np.isinf(logpdf_H0)) or np.any(np.isinf(logpdf_HA)):\n",
+ " print(\"WARNING: Inf values found in logpdf!\")\n",
+ "\n",
+ " log_likelihood_H0 = np.sum(logpdf_H0)\n",
+ " log_likelihood_HA = np.sum(logpdf_HA)\n",
+ "\n",
+ " print(f\"log_likelihood_H0: {log_likelihood_H0}\")\n",
+ " print(f\"log_likelihood_HA: {log_likelihood_HA}\")\n",
+ "\n",
+ " return log_likelihood_HA - log_likelihood_H0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "289d3436-1728-44e9-9a5d-98e88bf5ef26",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#this is where we do our Monte Carlo simulation\n",
+ "def simulate_frechet(n, shape, loc, scale, num_simulations=1000):\n",
+ " return -genextreme.rvs(-shape, loc=loc, scale=scale, size=(num_simulations, n))\n",
+ "\n",
+ "# Simulate under H0\n",
+ "simulated_D = simulate_frechet(len(D), *params_H0, num_simulations=1000)\n",
+ "\n",
+ "# Compute log-likelihood ratios for each simulated sequence\n",
+ "simulated_LLRs = np.array([log_likelihood_ratio(seq, params_H0, params_HA) for seq in simulated_D])\n",
+ "\n",
+ "# 1st percentile for critical value\n",
+ "c = np.percentile(simulated_LLRs, 1)\n",
+ "\n",
+ "print(\"Critical value c:\", c)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c25ef484-3b27-4313-b59a-85fb7a0d7d76",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def sequential_likelihood_ratio_test(D, params_H0, params_HA, c):\n",
+ " \"\"\"\n",
+ " we now run the sequential likelihood ratio, checking each x_i wrt x_i-1\n",
+ " we accept sequentially, but if we ever find one x_i such that we are below the\n",
+ " threshold, we immediately reject, as this signals a crash in the market\n",
+ " \"\"\"\n",
+ " shape_H0, loc_H0, scale_H0 = params_H0\n",
+ " shape_HA, loc_HA, scale_HA = params_HA\n",
+ "\n",
+ " for d in D:\n",
+ " llr = genextreme.logpdf(-d, -shape_HA, loc=loc_HA, scale=scale_HA) - \\\n",
+ " genextreme.logpdf(-d, -shape_H0, loc=loc_H0, scale=scale_H0)\n",
+ "\n",
+ " if llr < c:\n",
+ " return True # Crash detected\n",
+ "\n",
+ " return False # No crash detected\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2059cf3e-0c57-42a0-af58-5576effc09e6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "crash_happened = sequential_likelihood_ratio_test(D, params_H0, params_HA, c)\n",
+ "print(\"Crash detected:\", crash_happened)"
+ ]
+ },
{
"cell_type": "markdown",
"id": "d1c2e4b5",
@@ -617,6 +1117,97 @@
" Implement a quantum TDA algorithm for persistent Betti numbers. Esstimating the persistent Betti numbers is a more general task than estimating the Betti number and it is more practical for TDA. It is an open problem to construct a quantum algorithm for the persistent Betti numbers in a way that is preferable for NISQ devices, and the only current implementation of a quantum algorithm for persistent betti number is shown [here](https://quantum-journal.org/papers/q-2022-12-07-873/pdf/)."
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d278e7af-b0c9-4db2-bd1e-3916f644a0bb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "# Answer to the Bonus question: Future Directions for Quantum TDA: A Hybrid NISQ-Friendly Pipeline \n",
+ "To address the limitations of Quantum Phase Estimation (QPE) in practical quantum TDA pipelines, we propose replacing QPE with variational algorithms such as the **Variational Quantum Eigensolver (VQE)** and **Variational Quantum Deflation (VQD)**. These methods are better suited for NISQ devices due to their shorter circuit depths and hybrid quantum-classical workflows. Below is a structured approach to building a hybrid pipeline and benchmarking its performance.\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## 1. Proposed Hybrid Pipeline \n",
+ "### Step 1: Hamiltonian Encoding \n",
+ "- **Combinatorial Laplacian as a Hamiltonian**: \n",
+ " Represent the combinatorial Laplacian \\(\\Delta_k\\) (used to compute Betti numbers) as a sum of Pauli operators. This involves decomposing \\(\\Delta_k\\) into tensor products of Pauli matrices (\\(X, Y, Z, I\\)), similar to the adjacency/Laplacian matrix encoding in the provided paper. \n",
+ " - Example: For a simplicial complex with \\(n\\) simplices, pad \\(\\Delta_k\\) to size \\(2^{\\lceil \\log_2 n \\rceil} \\times 2^{\\lceil \\log_2 n \\rceil}\\) and map it to qubit operators.\n",
+ "\n",
+ "### Step 2: Ansatz Design \n",
+ "- **Adaptive Ansatz for TDA**: \n",
+ " Use a parameterized quantum circuit (ansatz) tailored for graph/simplicial complex structures. Examples include: \n",
+ " - **Layered Hardware-Efficient Ansatz**: Alternating layers of single-qubit rotations (\\(R_X, R_Z\\)) and entangling gates (e.g., CNOT or CZ). \n",
+ " - **Graph-Inspired Ansatz**: Incorporate connectivity patterns from the simplicial complex into the ansatz topology. \n",
+ " - **Symmetry-Preserving Ansatz**: Exploit the symmetry of \\(\\Delta_k\\) (e.g., sparsity or block structure) to reduce parameter counts.\n",
+ "\n",
+ "### Step 3: Optimization Loop \n",
+ "- **VQE for Ground State**: \n",
+ " Minimize \\(\\langle \\psi(\\theta) | \\Delta_k | \\psi(\\theta) \\rangle\\) to estimate the smallest eigenvalue (closest to zero). \n",
+ "- **VQD for Excited States**: \n",
+ " Iteratively deflate \\(\\Delta_k\\) to estimate higher eigenvalues: \n",
+ " 1. After finding the ground state \\(|\\psi_0\\rangle\\), modify \\(\\Delta_k \\rightarrow \\Delta_k - \\lambda_0 |\\psi_0\\rangle\\langle\\psi_0|\\). \n",
+ " 2. Repeat VQE on the deflated Hamiltonian to find the next eigenvalue. \n",
+ "\n",
+ "### Step 4: Noise Mitigation \n",
+ "- **Error Suppression**: \n",
+ " Use techniques like zero-noise extrapolation (ZNE), dynamical decoupling, or measurement error mitigation to improve eigenvalue estimates. \n",
+ "- **Noise-Adaptive Ansatz**: \n",
+ " Design ansätze with reduced gate counts or tailored to device noise profiles (e.g., avoiding fragile two-qubit gates).\n",
+ "\n",
+ "---\n",
+ "\n",
+ "## 2. Benchmarking Framework \n",
+ "Compare the hybrid pipeline against textbook QPE across key metrics: \n",
+ "\n",
+ "| **Metric** | **VQE/VQD** | **QPE** | \n",
+ "|-------------------------|---------------------------------------|---------------------------------------| \n",
+ "| Circuit Depth | Shallow (polynomial in \\(n\\)) | Deep (exponential in precision \\(q\\)) | \n",
+ "| Qubit Count | \\(O(\\log n)\\) | \\(O(\\log n + q)\\) | \n",
+ "| Classical Cost | Iterative optimization (\\(O(\\text{poly}(n))\\)) | Post-processing (\\(O(2^q)\\)) | \n",
+ "| Noise Resilience | Robust (short circuits) | Fragile (long coherence required) | \n",
+ "| Eigenvalue Accuracy | Approximate (\\(\\pm \\epsilon\\)) | Exact (up to precision \\(q\\)) | \n",
+ "\n",
+ "### Key Experiments \n",
+ "1. **Simulation**: \n",
+ " - Use quantum simulators (e.g., Qiskit Aer) to compare accuracy and convergence rates for small simplicial complexes (\\(n \\leq 10\\)). \n",
+ "2. **Hardware Runs**: \n",
+ " - Deploy on NISQ devices (e.g., IBMQ, Rigetti) to test noise resilience and runtime. \n",
+ "3. **Scaling Tests**: \n",
+ " - Measure runtime and error growth as \\(n\\) increases, comparing empirical results to theoretical predictions (e.g., \\(O(n^2)\\) vs. \\(O(2^n)\\)). \n",
+ "\n",
+ "---\n",
+ "\n",
+ "## 3. Advantages and Challenges \n",
+ "### Advantages \n",
+ "- **NISQ Compatibility**: Shallow circuits reduce sensitivity to decoherence. \n",
+ "- **Flexibility**: Ansatz and optimizer can be tailored to specific problem structures. \n",
+ "- **Cost-Efficiency**: Classical optimization leverages existing HPC resources. \n",
+ "\n",
+ "### Challenges \n",
+ "- **Ansatz Expressivity**: Poorly chosen ansätze may fail to capture eigenstates. \n",
+ "- **Optimization Traps**: Classical optimizers (e.g., COBYLA, SPSA) may converge to local minima. \n",
+ "- **Noise Propagation**: Errors in eigenvalue estimates compound during deflation (VQD). \n",
+ "\n",
+ "---\n",
+ "\n",
+ "## 4. Implementation Roadmap \n",
+ "1. **Codebase Integration**: \n",
+ " - Extend existing quantum TDA libraries (e.g., Qiskit, Pennylane) to support combinatorial Laplacians and variational algorithms. \n",
+ "2. **Hardware Experiments**: \n",
+ " - Run on IBMQ or Rigetti devices for \\(n=4\\)–\\(8\\) simplices, comparing Betti number estimates to classical TDA tools (e.g., GUDHI). \n",
+ "3. **Algorithm Refinement**: \n",
+ " - Develop automated ansatz design tools (e.g., genetic algorithms) and noise-adaptive optimizers. \n",
+ "\n",
+ "---\n",
+ "\n",
+ "## Conclusion \n",
+ "By replacing QPE with VQE/VQD, we create a **NISQ-friendly quantum TDA pipeline** that balances accuracy and practicality. While variational methods introduce approximations, their compatibility with near-term hardware enables real-world applications in finance, biology, and materials science. Future work should focus on optimizing ansatz design and integrating error mitigation to achieve quantum advantage in topological data analysis. \n",
+ "\n"
+ ]
+ },
{
"cell_type": "code",
"execution_count": null,
@@ -624,7 +1215,49 @@
"metadata": {},
"outputs": [],
"source": [
- "# write your code here"
+ "# write your code here\n",
+ "\n",
+ "!pip install qiskit-terra # Install qiskit-terra\n",
+ "!pip install qiskit[visualization] # Install visualization tools, if you need them\n",
+ "\n",
+ "# Restart runtime here (Runtime -> Restart runtime)\n",
+ "\n",
+ "import qiskit.algorithms\n",
+ "print(qiskit.algorithms.__version__) # Check version. Should be 0.24.0 or higher\n",
+ "\n",
+ "from qiskit.algorithms import VQE\n",
+ "from qiskit.algorithms.optimizers import COBYLA\n",
+ "from qiskit.circuit.library import TwoLocal\n",
+ "from qiskit.opflow import PauliSumOp # Or SparsePauliOp\n",
+ "from qiskit.quantum_info import SparsePauliOp\n",
+ "from qiskit.primitives import Sampler\n",
+ "from qiskit import Aer\n",
+ "\n",
+ "# ... (rest of your VQE code as before, replacing the placeholder for the Laplacian)\n",
+ "\n",
+ "# Example Laplacian (replace with your actual Laplacian):\n",
+ "pauli_list = [\n",
+ " (\"ZZII\", 0.25),\n",
+ " (\"IIZZ\", 0.25),\n",
+ " (\"ZIZI\", -0.1),\n",
+ " (\"IXZX\", -0.1),\n",
+ "]\n",
+ "pauli_laplacian = SparsePauliOp.from_list(pauli_list)\n",
+ "\n",
+ "\n",
+ "num_qubits = 4\n",
+ "ansatz = TwoLocal(num_qubits=num_qubits, rotation_blocks=[\"rx\", \"rz\"], entanglement_blocks=\"cz\", reps=2)\n",
+ "optimizer = COBYLA(maxiter=100)\n",
+ "\n",
+ "backend = Aer.get_backend('statevector_simulator')\n",
+ "sampler = Sampler(backend=backend)\n",
+ "\n",
+ "vqe = VQE(sampler=sampler, ansatz=ansatz, optimizer=optimizer)\n",
+ "\n",
+ "result = vqe.compute_minimum_eigenvalue(pauli_laplacian)\n",
+ "\n",
+ "print(\"Estimated smallest eigenvalue:\", result.eigenvalue)\n",
+ "print(\"Optimal parameters:\", result.optimal_parameters)"
]
},
{
@@ -638,9 +1271,9 @@
],
"metadata": {
"kernelspec": {
- "display_name": "tda",
+ "display_name": "Python 3 [moodys]",
"language": "python",
- "name": "python3"
+ "name": "python3_moodys_yhl12u"
},
"language_info": {
"codemirror_mode": {
@@ -652,7 +1285,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.11.11"
+ "version": "3.11.9"
}
},
"nbformat": 4,