From f9a834debc58f4036669b802ef65e88eab52de37 Mon Sep 17 00:00:00 2001
From: alexhold5 <87720137+alexhold5@users.noreply.github.com>
Date: Sun, 2 Feb 2025 10:23:32 -0500
Subject: [PATCH] Add files via upload

---
 moodys_challenge.ipynb | 695 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 664 insertions(+), 31 deletions(-)
diff --git a/moodys_challenge.ipynb b/moodys_challenge.ipynb
index 7ee0e87..91c9f70 100644
--- a/moodys_challenge.ipynb
+++ b/moodys_challenge.ipynb
@@ -161,15 +161,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "4ec880cd",
-   "metadata": {},
-   "outputs": [],
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "done\n"
+     ]
+    }
+   ],
    "source": [
+    "#testing\n",
+    "\n",
     "import pandas as pd\n",
     "import numpy as np\n",
     "\n",
-    "time_series = np.log(pd.read_csv(\"SP500.csv\", header=None).to_numpy().squeeze())"
+    "time_series = np.log(pd.read_csv(\"sp500.csv\", header=None).to_numpy().squeeze())"
    ]
   },
   {
@@ -209,36 +221,93 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "id": "7b701be6",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# write your code here"
+    "# write your code here\n",
+    "\n",
+    "\n",
+    "def takens_embedding(time_series, N, d):\n",
+    "    \"\"\"\n",
+    "    We are using Taken's embedding theorem, which states that we can\n",
+    "    reconstruct the topology and dynamical behavior of the global system\n",
+    "    via a vector of dimension d of the time delayed data points\n",
+    "\n",
+    "    \"\"\"\n",
+    "    L = len(time_series)\n",
+    "    if L < (N - 1) * d:\n",
+    "        raise ValueError(\"Time series is too short for given N and d.\")\n",
+    "    # list comprehension to create the time series\n",
+    "    return np.array([time_series[i : i + (N - 1) * d + 1 : d] for i in range(L - (N - 1) * d)])\n",
+    "\n",
+    "\n"
    ]
   },
   {
-   "cell_type": "markdown",
-   "id": "9376bd0b",
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "e0140903-54bb-42bd-9df1-ace45517b265",
    "metadata": {},
+   "outputs": [],
    "source": [
-    "<div class=\"alert alert-block alert-danger\">\n",
-    "    \n",
-    "<b>BONUS EXERCISE:</b> \n",
+    "def sliding_window(embedded_vectors, w):\n",
+    "    \"\"\"\n",
+    "    This returns a set of w-sized sequences of embedded vectors.\n",
+    "    In total, this acts like a sliding window of the behavior of the system.\n",
+    "    \"\"\"\n",
+    "    M = embedded_vectors.shape[0]\n",
+    "    K = M - w + 1  # Number of windows\n",
+    "\n",
+    "    if K <= 0:\n",
+    "        raise ValueError(\"Window size w is too large for the given data.\")\n",
+    "    # list comprehension to divide the previous 2-d array into submatrices\n",
+    "    return np.array([embedded_vectors[i : i + w] for i in range(K)])\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "95c41a2c-23b4-412e-b4de-1b0f8f0758d5",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Shape of embedded vectors: (81, 3)\n",
+      "Shape of point cloud: (77, 5, 3)\n"
+     ]
+    }
+   ],
+   "source": [
+    "#embedding dimension (dimension of simplex)\n",
+    "N = 3\n",
+    "#time delay (1 cause discrete)\n",
+    "d = 1\n",
+    "#dimension of window\n",
+    "w = 5\n",
     "\n",
-    "`gtda.time_series.TakensEmbedding` can conduct this transformation. Try avoid using this function and build your own embedding function.\n",
+    "embedded_vectors = takens_embedding(time_series, N, d)\n",
     "\n",
-    "</div>"
+    "# we construct our point cloude via this sliding window\n",
+    "point_cloud = sliding_window(embedded_vectors, w)\n",
+    "\n",
+    "print(\"Shape of embedded vectors:\", embedded_vectors.shape)\n",
+    "print(\"Shape of point cloud:\", point_cloud.shape)\n"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "9780b90c",
+   "id": "9376bd0b",
    "metadata": {},
    "source": [
-    "<div class=\"alert alert-block alert-info\"> <b>NOTE</b>:\n",
+    "<div class=\"alert alert-block alert-danger\">\n",
+    "    \n",
+    "<b>BONUS EXERCISE:</b> \n",
     "\n",
-    "From step 2, we present an example originally provided in the appendix of [Khandelwal's and Chandra's paper](https://arxiv.org/abs/2302.09553). This example can be used to verify your code.\n",
+    "`gtda.time_series.TakensEmbedding` can conduct this transformation. Try avoid using this function and build your own embedding function.\n",
     "\n",
     "</div>"
    ]
@@ -273,12 +342,68 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "026d6b61",
+   "execution_count": 5,
+   "id": "efcda15b-3685-431a-91de-cb1929743103",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# write your code here"
+    "def sort_simplices(input_list):\n",
+    "    \"\"\"\n",
+    "    Organizes a list of simplices into groups based on dimension.\n",
+    "    \n",
+    "    Args:\n",
+    "    - input_list (list of lists): A list containing simplices of different dimensions.\n",
+    "    \n",
+    "    Returns:\n",
+    "    - A nested list: [[0-simplices], [1-simplices], [2-simplices], ...]\n",
+    "    \"\"\"\n",
+    "    # Sort elements into categories based on length\n",
+    "    sorted_simplices = {}\n",
+    "    \n",
+    "    for simplex in input_list:\n",
+    "        key = len(simplex) - 1  # Dimension of the simplex (0-based)\n",
+    "        simplex_sorted = sorted(simplex)  # Ensure order consistency\n",
+    "        \n",
+    "        if key not in sorted_simplices:\n",
+    "            sorted_simplices[key] = []\n",
+    "        \n",
+    "        sorted_simplices[key].append(simplex_sorted)\n",
+    "\n",
+    "    # Convert the dictionary into a list of lists, ensuring correct order\n",
+    "    return [sorted_simplices.get(dim, []) for dim in sorted(sorted_simplices.keys())]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "026d6b61",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "done\n"
+     ]
+    }
+   ],
+   "source": [
+    "# write your code here\n",
+    "import gudhi\n",
+    "\n",
+    "point_cloud = point_cloud.reshape(point_cloud.shape[0], -1)\n",
+    "\n",
+    "epsilon = 12\n",
+    "rips_complex = gudhi.RipsComplex(points=point_cloud, max_edge_length = epsilon)\n",
+    "unfilt_simplex_tree = rips_complex.create_simplex_tree(max_dimension=2)\n",
+    "filt_simplex_tree = list(unfilt_simplex_tree.get_filtration())\n",
+    "simplex_tree = []\n",
+    "for filtered_value in filt_simplex_tree:\n",
+    "    simplex_tree.append(filtered_value[0])\n",
+    "\n",
+    "simplex_tree = sort_simplices(simplex_tree)"
    ]
   },
   {
@@ -337,12 +462,27 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "id": "246e41ce",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# write your code here"
+    "# write your code here\n",
+    "import numpy as np\n",
+    "def generate_boundary_operator(simplex_tree, k):\n",
+    "    num_k_simplicies = len(simplex_tree[k])\n",
+    "    num_k1_simplicies = len(simplex_tree[k-1])\n",
+    "    op = np.zeros((num_k_simplicies, num_k1_simplicies), dtype=int)\n",
+    "    for i in range(num_k_simplicies):\n",
+    "      simplex = simplex_tree[k][i]\n",
+    "      row = [0] * num_k1_simplicies\n",
+    "      sub_simplicies = [simplex[:j] + simplex[j + 1:] for j in range(len(simplex))]\n",
+    "      sub_simplicies.sort()\n",
+    "      for z,sub in enumerate(sub_simplicies):\n",
+    "          index = simplex_tree[k-1].index(sub)\n",
+    "          op[i,index] = (-1)**z\n",
+    "\n",
+    "    return op.T"
    ]
   },
   {
@@ -423,12 +563,128 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "id": "8f818f78",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Unitary Matrix:\n",
+      " [[-0.03097503-0.99952016j  0.        +0.j          0.        +0.j\n",
+      "  ...  0.        +0.j          0.        +0.j\n",
+      "   0.        +0.j        ]\n",
+      " [ 0.        +0.j         -0.03097503-0.99952016j  0.        +0.j\n",
+      "  ...  0.        +0.j          0.        +0.j\n",
+      "   0.        +0.j        ]\n",
+      " [ 0.        +0.j          0.        +0.j         -0.03097503-0.99952016j\n",
+      "  ...  0.        +0.j          0.        +0.j\n",
+      "   0.        +0.j        ]\n",
+      " ...\n",
+      " [ 0.        +0.j          0.        +0.j          0.        +0.j\n",
+      "  ...  0.95507364-0.29636858j  0.        +0.j\n",
+      "   0.        +0.j        ]\n",
+      " [ 0.        +0.j          0.        +0.j          0.        +0.j\n",
+      "  ...  0.        +0.j          0.95507364-0.29636858j\n",
+      "   0.        +0.j        ]\n",
+      " [ 0.        +0.j          0.        +0.j          0.        +0.j\n",
+      "  ...  0.        +0.j          0.        +0.j\n",
+      "   0.95507364-0.29636858j]]\n",
+      "Is Unitary: True\n",
+      "Instruction(name='unitary', num_qubits=12, num_clbits=0, params=[array([[-0.03097503-0.99952016j,  0.        +0.j        ,\n",
+      "         0.        +0.j        , ...,  0.        +0.j        ,\n",
+      "         0.        +0.j        ,  0.        +0.j        ],\n",
+      "       [ 0.        +0.j        , -0.03097503-0.99952016j,\n",
+      "         0.        +0.j        , ...,  0.        +0.j        ,\n",
+      "         0.        +0.j        ,  0.        +0.j        ],\n",
+      "       [ 0.        +0.j        ,  0.        +0.j        ,\n",
+      "        -0.03097503-0.99952016j, ...,  0.        +0.j        ,\n",
+      "         0.        +0.j        ,  0.        +0.j        ],\n",
+      "       ...,\n",
+      "       [ 0.        +0.j        ,  0.        +0.j        ,\n",
+      "         0.        +0.j        , ...,  0.95507364-0.29636858j,\n",
+      "         0.        +0.j        ,  0.        +0.j        ],\n",
+      "       [ 0.        +0.j        ,  0.        +0.j        ,\n",
+      "         0.        +0.j        , ...,  0.        +0.j        ,\n",
+      "         0.95507364-0.29636858j,  0.        +0.j        ],\n",
+      "       [ 0.        +0.j        ,  0.        +0.j        ,\n",
+      "         0.        +0.j        , ...,  0.        +0.j        ,\n",
+      "         0.        +0.j        ,  0.95507364-0.29636858j]])])\n"
+     ]
+    }
+   ],
    "source": [
-    "# write your code here"
+    "# write your code here\n",
+    "import numpy as np\n",
+    "from scipy.sparse import csr_matrix\n",
+    "import scipy as scipy\n",
+    "def need_padding(laplacian: np.ndarray):\n",
+    "  size = laplacian.shape[0]\n",
+    "  if (size & (size - 1)) == 0:\n",
+    "    return False, size\n",
+    "\n",
+    "  next_power_of_2 = 2**(size-1).bit_length()\n",
+    "  return True, next_power_of_2\n",
+    "\n",
+    "def build_laplacian(simplex_tree):\n",
+    "  d1 = generate_boundary_operator(simplex_tree, 1)\n",
+    "  d1_sparse = csr_matrix(d1)\n",
+    "  d2 = generate_boundary_operator(simplex_tree, 2)\n",
+    "  d2_sparse = csr_matrix(d2)\n",
+    "  # convert to sparse for matrix multiplication speedup\n",
+    "\n",
+    "  # laplacian = np.add(np.matmul((d1.T), d1), np.matmul(d2, (d2.T)))\n",
+    "  # np.reshape(laplacian, (6,6))\n",
+    "  # padding_bool, size = need_padding(laplacian)\n",
+    "\n",
+    "  laplacian = d1_sparse.conj().T @ d1_sparse + d2_sparse @ d2_sparse.conj().T\n",
+    "  laplacian = laplacian.toarray()\n",
+    "  # laplacian = np.reshape(laplacian, (6, 6))  # Only reshape if you need it\n",
+    "  padding_bool, size = need_padding(laplacian)\n",
+    "\n",
+    "  if padding_bool:\n",
+    "    n = laplacian.shape[0]\n",
+    "    max_bound = -np.inf\n",
+    "    for i in range(n):\n",
+    "        center = laplacian[i, i]\n",
+    "        radius = sum(abs(laplacian[i, j]) for j in range(n) if j != i)\n",
+    "        max_bound = max(max_bound, center + radius)\n",
+    "    \n",
+    "    padded_laplacian = np.pad(laplacian, (0, size - laplacian.shape[0]))\n",
+    "\n",
+    "    for i in range(laplacian.shape[0], size): padded_laplacian[i,i] = max_bound/2\n",
+    "  \n",
+    "  # scaling_factor = (2*math.pi) // max_bound\n",
+    "  # padded_laplacian = scaling_factor * padded_laplacian\n",
+    " \n",
+    "  return padded_laplacian\n",
+    "\n",
+    "from qiskit.circuit.library import UnitaryGate\n",
+    "def convert_to_unitary(laplacian):\n",
+    "    \"\"\"\n",
+    "    Convert the Laplacian into a unitary matrix using matrix exponentiation.\n",
+    "    \"\"\"\n",
+    "    # Ensure Laplacian is Hermitian before exponentiation\n",
+    "    laplacian = 0.5 * (laplacian + laplacian.T)\n",
+    "\n",
+    "    # Compute matrix exponential\n",
+    "    iH = -1j * laplacian\n",
+    "    U = scipy.linalg.expm(iH)\n",
+    "\n",
+    "    # Check unitarity\n",
+    "    identity_check = np.allclose(U @ U.conj().T, np.eye(U.shape[0]), atol=1e-8)\n",
+    "    if not identity_check:\n",
+    "        raise ValueError(\"Error: Resulting matrix is not unitary\")\n",
+    "\n",
+    "    print(\"Unitary Matrix:\\n\", U)  # Debugging Output\n",
+    "    print(\"Is Unitary:\", identity_check)  # Should be True\n",
+    "\n",
+    "    return UnitaryGate(U)\n",
+    "\n",
+    "I = build_laplacian(simplex_tree)\n",
+    "H = convert_to_unitary(I)\n",
+    "print(H)"
    ]
   },
   {
@@ -502,9 +758,55 @@
    "execution_count": null,
    "id": "848ef40c",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "AB\n",
+      "0\n"
+     ]
+    }
+   ],
    "source": [
-    "# write your code here"
+    "# write your code here\n",
+    "\n",
+    "from qiskit import QuantumCircuit, transpile\n",
+    "# from qiskit.algorithms import PhaseEstimation\n",
+    "from qiskit.circuit.library import QFT\n",
+    "from qiskit.quantum_info import Statevector\n",
+    "from qiskit_aer import Aer\n",
+    "from qiskit.circuit.library import PhaseEstimation\n",
+    "\n",
+    "def run_qpe(H, qpe_qubits=3):\n",
+    "    \"\"\"\n",
+    "    Runs QPE to estimate eigenvalues of H.\n",
+    "\n",
+    "    Args:\n",
+    "        H: Unitary matrix.\n",
+    "        qpe_qubits: Number of phase estimation qubits.\n",
+    "\n",
+    "    Returns:\n",
+    "        Probability of measuring all-zero states (p(0)).\n",
+    "    \"\"\"\n",
+    "    # Build QPE circuit\n",
+    "    qpe = PhaseEstimation(num_evaluation_qubits=qpe_qubits, unitary=H)\n",
+    "    circuit = qpe.circuit\n",
+    "\n",
+    "    # Simulate\n",
+    "    simulator = Aer.get_backend('statevector_simulator')\n",
+    "    result = simulator.run(circuit).result()\n",
+    "    statevector = result.get_statevector()\n",
+    "    \n",
+    "    # Measure probability of all-zero state\n",
+    "    zero_state = '0' * qpe_qubits\n",
+    "    p_zero = np.abs(statevector[Statevector.from_label(zero_state).data]) ** 2\n",
+    "\n",
+    "    return p_zero\n",
+    "\n",
+    "# Example usage:\n",
+    "p_zero = run_qpe(H)\n",
+    "betti_estimate = int(round((2 ** 3) * p_zero))  # β_k = 2^q * p(0)"
    ]
   },
   {
@@ -560,7 +862,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "5ef68f98",
    "metadata": {},
    "outputs": [],
@@ -583,6 +885,204 @@
     "# write your code here"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "665df20a-1f52-415b-ac9b-4483cae32836",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import statsmodels.api as sm\n",
+    "from scipy.stats import genextreme\n",
+    "\n",
+    "# arbitrary Betti numbers\n",
+    "betti_numbers = np.array([12, 14, 13, 15, 16, 14, 10, 5, 3])\n",
+    "\n",
+    "# we are computing the differences via x_i-x_{i-1}\n",
+    "D = np.diff(betti_numbers)\n",
+    "\n",
+    "print(\"Differences D_i:\", D)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d6252f18-1fb7-41ef-8e06-2483c51ba22a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def newey_west_variance(D, max_lag=None):\n",
+    "    \"\"\"\n",
+    "    What we have is data that is NOT independant. Therefore, we can't\n",
+    "    implement normal variance and\n",
+    "\n",
+    "    Parameters:\n",
+    "        D\n",
+    "        max_lag (int): Maximum lag order.\n",
+    "\n",
+    "    Returns:\n",
+    "        float: Newey-West standard deviation\n",
+    "    \"\"\"\n",
+    "    if max_lag is None:\n",
+    "        max_lag = int(4 * (len(D) / 100) ** (2 / 9))  # Rule-of-thumb bandwidth\n",
+    "\n",
+    "    # Compute sample mean\n",
+    "    D_mean = np.mean(D)\n",
+    "\n",
+    "    # Compute autocovariances\n",
+    "    n = len(D)\n",
+    "    gamma = np.array([\n",
+    "        np.sum((D[:n - j] - D_mean) * (D[j:] - D_mean)) / n\n",
+    "        for j in range(max_lag + 1)\n",
+    "    ])\n",
+    "\n",
+    "    # Bartlett weights for Newey-West estimator\n",
+    "    weights = 1 - np.arange(1, max_lag + 1) / (max_lag + 1)\n",
+    "\n",
+    "    # Newey-West variance formula\n",
+    "    nw_var = gamma[0] + 2 * np.sum(weights * gamma[1:])\n",
+    "\n",
+    "    return np.sqrt(nw_var)  # Convert variance to standard deviation\n",
+    "\n",
+    "nw_std = newey_west_variance(D)\n",
+    "print(\"Newey-West Adjusted Standard Deviation:\", nw_std)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "df265c44-4a1d-468e-ba8a-3d7e90433cc1",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Compute delta (3 * adjusted standard deviation) (to get 99% power test)\n",
+    "#\n",
+    "delta = 2 * np.std(D)\n",
+    "\n",
+    "nw_std = newey_west_variance(D)\n",
+    "delta_adjusted = nw_std\n",
+    "\n",
+    "print(f\"this is D: {D}\")\n",
+    "\n",
+    "# Split data into null and alternative distributions\n",
+    "D_H0 = D[D >= -delta_adjusted]\n",
+    "D_HA = D[D < -delta_adjusted]\n",
+    "\n",
+    "\n",
+    "print(f\"this is delta: {delta}, this is Newey-West SD: {nw_std}, this is the adjusted {delta_adjusted}, this is the null hypothesis data: {D_H0}, this is the alternative data: {D_HA}\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "df2b486c-eefa-49fb-a253-f6d4dabaf8d8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#defining our Fréchet distribution function\n",
+    "def fit_frechet(data):\n",
+    "    shape, loc, scale = genextreme.fit(-data)\n",
+    "    return -shape, loc, scale\n",
+    "\n",
+    "params_H0 = fit_frechet(D_H0)\n",
+    "params_HA = fit_frechet(D_HA)\n",
+    "\n",
+    "print(\"H0 Parameters (shape, loc, scale):\", params_H0)\n",
+    "print(\"HA Parameters (shape, loc, scale):\", params_HA)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cd1c5f2f-530f-4e8a-8c61-e17ab1c979df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#this is the function that will define our log likelihood ratio\n",
+    "def log_likelihood_ratio(data, params_H0, params_HA):\n",
+    "    shape_H0, loc_H0, scale_H0 = params_H0\n",
+    "    shape_HA, loc_HA, scale_HA = params_HA\n",
+    "\n",
+    "    logpdf_H0 = genextreme.logpdf(-data, -shape_H0, loc=loc_H0, scale=scale_H0)\n",
+    "    logpdf_HA = genextreme.logpdf(-data, -shape_HA, loc=loc_HA, scale=scale_HA)\n",
+    "\n",
+    "    print(f\"logpdf_H0: {logpdf_H0}\")\n",
+    "    print(f\"logpdf_HA: {logpdf_HA}\")\n",
+    "\n",
+    "    if np.any(np.isnan(logpdf_H0)) or np.any(np.isnan(logpdf_HA)):\n",
+    "        print(\"WARNING: NaN values found in logpdf!\")\n",
+    "    if np.any(np.isinf(logpdf_H0)) or np.any(np.isinf(logpdf_HA)):\n",
+    "        print(\"WARNING: Inf values found in logpdf!\")\n",
+    "\n",
+    "    log_likelihood_H0 = np.sum(logpdf_H0)\n",
+    "    log_likelihood_HA = np.sum(logpdf_HA)\n",
+    "\n",
+    "    print(f\"log_likelihood_H0: {log_likelihood_H0}\")\n",
+    "    print(f\"log_likelihood_HA: {log_likelihood_HA}\")\n",
+    "\n",
+    "    return log_likelihood_HA - log_likelihood_H0"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "289d3436-1728-44e9-9a5d-98e88bf5ef26",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#this is where we do our Monte Carlo simulation\n",
+    "def simulate_frechet(n, shape, loc, scale, num_simulations=1000):\n",
+    "    return -genextreme.rvs(-shape, loc=loc, scale=scale, size=(num_simulations, n))\n",
+    "\n",
+    "# Simulate under H0\n",
+    "simulated_D = simulate_frechet(len(D), *params_H0, num_simulations=1000)\n",
+    "\n",
+    "# Compute log-likelihood ratios for each simulated sequence\n",
+    "simulated_LLRs = np.array([log_likelihood_ratio(seq, params_H0, params_HA) for seq in simulated_D])\n",
+    "\n",
+    "# 1st percentile for critical value\n",
+    "c = np.percentile(simulated_LLRs, 1)\n",
+    "\n",
+    "print(\"Critical value c:\", c)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c25ef484-3b27-4313-b59a-85fb7a0d7d76",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sequential_likelihood_ratio_test(D, params_H0, params_HA, c):\n",
+    "    \"\"\"\n",
+    "    we now run the sequential likelihood ratio, checking each x_i wrt x_i-1\n",
+    "    we accept sequentially, but if we ever find one x_i such that we are below the\n",
+    "    threshold, we immediately reject, as this signals a crash in the market\n",
+    "    \"\"\"\n",
+    "    shape_H0, loc_H0, scale_H0 = params_H0\n",
+    "    shape_HA, loc_HA, scale_HA = params_HA\n",
+    "\n",
+    "    for d in D:\n",
+    "        llr = genextreme.logpdf(-d, -shape_HA, loc=loc_HA, scale=scale_HA) - \\\n",
+    "              genextreme.logpdf(-d, -shape_H0, loc=loc_H0, scale=scale_H0)\n",
+    "\n",
+    "        if llr < c:\n",
+    "            return True  # Crash detected\n",
+    "\n",
+    "    return False  # No crash detected\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2059cf3e-0c57-42a0-af58-5576effc09e6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "crash_happened = sequential_likelihood_ratio_test(D, params_H0, params_HA, c)\n",
+    "print(\"Crash detected:\", crash_happened)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "d1c2e4b5",
@@ -617,6 +1117,97 @@
     "  Implement a quantum TDA algorithm for persistent Betti numbers. Esstimating the persistent Betti numbers is a more general task than estimating the Betti number and it is more practical for TDA. It is an open problem to construct a quantum algorithm for the persistent Betti numbers in a way that is preferable for NISQ devices, and the only current implementation of a quantum algorithm for persistent betti number is shown [here](https://quantum-journal.org/papers/q-2022-12-07-873/pdf/)."
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d278e7af-b0c9-4db2-bd1e-3916f644a0bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "# Answer to the Bonus question: Future Directions for Quantum TDA: A Hybrid NISQ-Friendly Pipeline  \n",
+    "To address the limitations of Quantum Phase Estimation (QPE) in practical quantum TDA pipelines, we propose replacing QPE with variational algorithms such as the **Variational Quantum Eigensolver (VQE)** and **Variational Quantum Deflation (VQD)**. These methods are better suited for NISQ devices due to their shorter circuit depths and hybrid quantum-classical workflows. Below is a structured approach to building a hybrid pipeline and benchmarking its performance.\n",
+    "\n",
+    "---\n",
+    "\n",
+    "## 1. Proposed Hybrid Pipeline  \n",
+    "### Step 1: Hamiltonian Encoding  \n",
+    "- **Combinatorial Laplacian as a Hamiltonian**:  \n",
+    "  Represent the combinatorial Laplacian \\(\\Delta_k\\) (used to compute Betti numbers) as a sum of Pauli operators. This involves decomposing \\(\\Delta_k\\) into tensor products of Pauli matrices (\\(X, Y, Z, I\\)), similar to the adjacency/Laplacian matrix encoding in the provided paper.  \n",
+    "  - Example: For a simplicial complex with \\(n\\) simplices, pad \\(\\Delta_k\\) to size \\(2^{\\lceil \\log_2 n \\rceil} \\times 2^{\\lceil \\log_2 n \\rceil}\\) and map it to qubit operators.\n",
+    "\n",
+    "### Step 2: Ansatz Design  \n",
+    "- **Adaptive Ansatz for TDA**:  \n",
+    "  Use a parameterized quantum circuit (ansatz) tailored for graph/simplicial complex structures. Examples include:  \n",
+    "  - **Layered Hardware-Efficient Ansatz**: Alternating layers of single-qubit rotations (\\(R_X, R_Z\\)) and entangling gates (e.g., CNOT or CZ).  \n",
+    "  - **Graph-Inspired Ansatz**: Incorporate connectivity patterns from the simplicial complex into the ansatz topology.  \n",
+    "  - **Symmetry-Preserving Ansatz**: Exploit the symmetry of \\(\\Delta_k\\) (e.g., sparsity or block structure) to reduce parameter counts.\n",
+    "\n",
+    "### Step 3: Optimization Loop  \n",
+    "- **VQE for Ground State**:  \n",
+    "  Minimize \\(\\langle \\psi(\\theta) | \\Delta_k | \\psi(\\theta) \\rangle\\) to estimate the smallest eigenvalue (closest to zero).  \n",
+    "- **VQD for Excited States**:  \n",
+    "  Iteratively deflate \\(\\Delta_k\\) to estimate higher eigenvalues:  \n",
+    "  1. After finding the ground state \\(|\\psi_0\\rangle\\), modify \\(\\Delta_k \\rightarrow \\Delta_k - \\lambda_0 |\\psi_0\\rangle\\langle\\psi_0|\\).  \n",
+    "  2. Repeat VQE on the deflated Hamiltonian to find the next eigenvalue.  \n",
+    "\n",
+    "### Step 4: Noise Mitigation  \n",
+    "- **Error Suppression**:  \n",
+    "  Use techniques like zero-noise extrapolation (ZNE), dynamical decoupling, or measurement error mitigation to improve eigenvalue estimates.  \n",
+    "- **Noise-Adaptive Ansatz**:  \n",
+    "  Design ansätze with reduced gate counts or tailored to device noise profiles (e.g., avoiding fragile two-qubit gates).\n",
+    "\n",
+    "---\n",
+    "\n",
+    "## 2. Benchmarking Framework  \n",
+    "Compare the hybrid pipeline against textbook QPE across key metrics:  \n",
+    "\n",
+    "| **Metric**              | **VQE/VQD**                          | **QPE**                              |  \n",
+    "|-------------------------|---------------------------------------|---------------------------------------|  \n",
+    "| Circuit Depth           | Shallow (polynomial in \\(n\\))         | Deep (exponential in precision \\(q\\)) |  \n",
+    "| Qubit Count             | \\(O(\\log n)\\)                         | \\(O(\\log n + q)\\)                     |  \n",
+    "| Classical Cost          | Iterative optimization (\\(O(\\text{poly}(n))\\)) | Post-processing (\\(O(2^q)\\))          |  \n",
+    "| Noise Resilience        | Robust (short circuits)               | Fragile (long coherence required)     |  \n",
+    "| Eigenvalue Accuracy     | Approximate (\\(\\pm \\epsilon\\))        | Exact (up to precision \\(q\\))         |  \n",
+    "\n",
+    "### Key Experiments  \n",
+    "1. **Simulation**:  \n",
+    "   - Use quantum simulators (e.g., Qiskit Aer) to compare accuracy and convergence rates for small simplicial complexes (\\(n \\leq 10\\)).  \n",
+    "2. **Hardware Runs**:  \n",
+    "   - Deploy on NISQ devices (e.g., IBMQ, Rigetti) to test noise resilience and runtime.  \n",
+    "3. **Scaling Tests**:  \n",
+    "   - Measure runtime and error growth as \\(n\\) increases, comparing empirical results to theoretical predictions (e.g., \\(O(n^2)\\) vs. \\(O(2^n)\\)).  \n",
+    "\n",
+    "---\n",
+    "\n",
+    "## 3. Advantages and Challenges  \n",
+    "### Advantages  \n",
+    "- **NISQ Compatibility**: Shallow circuits reduce sensitivity to decoherence.  \n",
+    "- **Flexibility**: Ansatz and optimizer can be tailored to specific problem structures.  \n",
+    "- **Cost-Efficiency**: Classical optimization leverages existing HPC resources.  \n",
+    "\n",
+    "### Challenges  \n",
+    "- **Ansatz Expressivity**: Poorly chosen ansätze may fail to capture eigenstates.  \n",
+    "- **Optimization Traps**: Classical optimizers (e.g., COBYLA, SPSA) may converge to local minima.  \n",
+    "- **Noise Propagation**: Errors in eigenvalue estimates compound during deflation (VQD).  \n",
+    "\n",
+    "---\n",
+    "\n",
+    "## 4. Implementation Roadmap  \n",
+    "1. **Codebase Integration**:  \n",
+    "   - Extend existing quantum TDA libraries (e.g., Qiskit, Pennylane) to support combinatorial Laplacians and variational algorithms.  \n",
+    "2. **Hardware Experiments**:  \n",
+    "   - Run on IBMQ or Rigetti devices for \\(n=4\\)–\\(8\\) simplices, comparing Betti number estimates to classical TDA tools (e.g., GUDHI).  \n",
+    "3. **Algorithm Refinement**:  \n",
+    "   - Develop automated ansatz design tools (e.g., genetic algorithms) and noise-adaptive optimizers.  \n",
+    "\n",
+    "---\n",
+    "\n",
+    "## Conclusion  \n",
+    "By replacing QPE with VQE/VQD, we create a **NISQ-friendly quantum TDA pipeline** that balances accuracy and practicality. While variational methods introduce approximations, their compatibility with near-term hardware enables real-world applications in finance, biology, and materials science. Future work should focus on optimizing ansatz design and integrating error mitigation to achieve quantum advantage in topological data analysis.  \n",
+    "\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -624,7 +1215,49 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# write your code here"
+    "# write your code here\n",
+    "\n",
+    "!pip install qiskit-terra  # Install qiskit-terra\n",
+    "!pip install qiskit[visualization] # Install visualization tools, if you need them\n",
+    "\n",
+    "# Restart runtime here (Runtime -> Restart runtime)\n",
+    "\n",
+    "import qiskit.algorithms\n",
+    "print(qiskit.algorithms.__version__) # Check version. Should be 0.24.0 or higher\n",
+    "\n",
+    "from qiskit.algorithms import VQE\n",
+    "from qiskit.algorithms.optimizers import COBYLA\n",
+    "from qiskit.circuit.library import TwoLocal\n",
+    "from qiskit.opflow import PauliSumOp # Or SparsePauliOp\n",
+    "from qiskit.quantum_info import SparsePauliOp\n",
+    "from qiskit.primitives import Sampler\n",
+    "from qiskit import Aer\n",
+    "\n",
+    "# ... (rest of your VQE code as before, replacing the placeholder for the Laplacian)\n",
+    "\n",
+    "# Example Laplacian (replace with your actual Laplacian):\n",
+    "pauli_list = [\n",
+    "    (\"ZZII\", 0.25),\n",
+    "    (\"IIZZ\", 0.25),\n",
+    "    (\"ZIZI\", -0.1),\n",
+    "    (\"IXZX\", -0.1),\n",
+    "]\n",
+    "pauli_laplacian = SparsePauliOp.from_list(pauli_list)\n",
+    "\n",
+    "\n",
+    "num_qubits = 4\n",
+    "ansatz = TwoLocal(num_qubits=num_qubits, rotation_blocks=[\"rx\", \"rz\"], entanglement_blocks=\"cz\", reps=2)\n",
+    "optimizer = COBYLA(maxiter=100)\n",
+    "\n",
+    "backend = Aer.get_backend('statevector_simulator')\n",
+    "sampler = Sampler(backend=backend)\n",
+    "\n",
+    "vqe = VQE(sampler=sampler, ansatz=ansatz, optimizer=optimizer)\n",
+    "\n",
+    "result = vqe.compute_minimum_eigenvalue(pauli_laplacian)\n",
+    "\n",
+    "print(\"Estimated smallest eigenvalue:\", result.eigenvalue)\n",
+    "print(\"Optimal parameters:\", result.optimal_parameters)"
    ]
   },
   {
@@ -638,9 +1271,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "tda",
+   "display_name": "Python 3 [moodys]",
    "language": "python",
-   "name": "python3"
+   "name": "python3_moodys_yhl12u"
   },
   "language_info": {
    "codemirror_mode": {
@@ -652,7 +1285,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.11"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,