diff --git a/constant_model_loss_transformations/loss_transformations.qmd b/constant_model_loss_transformations/loss_transformations.qmd
index b9170f76..ac921167 100644
--- a/constant_model_loss_transformations/loss_transformations.qmd
+++ b/constant_model_loss_transformations/loss_transformations.qmd
@@ -21,7 +21,7 @@ jupyter:
       format_version: '1.0'
       jupytext_version: 1.16.1
   kernelspec:
-    display_name: Python 3 (ipykernel)
+    display_name: ds100env
     language: python
     name: python3
 ---
@@ -116,7 +116,7 @@ $$
 \begin{align}
 0 &= {\frac{-2}{n}}\sum^{n}_{i=1} (y_i - \hat{\theta_0})
 \\ &= \sum^{n}_{i=1} (y_i - \hat{\theta_0}) \quad \quad \text{divide both sides by} \frac{-2}{n}
-\\ &= \left(\sum^{n}_{i=1} y_i\right) - \left(\sum^{n}_{i=1} \theta_0\right) \quad \quad \text{separate sums}
+\\ &= \left(\sum^{n}_{i=1} y_i\right) - \left(\sum^{n}_{i=1} \hat{\theta_0}\right) \quad \quad \text{separate sums}
 \\ &= \left(\sum^{n}_{i=1} y_i\right) - (n \cdot \hat{\theta_0}) \quad \quad  \text{c + c + … + c = nc}
 \\ n \cdot \hat{\theta_0} &= \sum^{n}_{i=1} y_i
 \\ \hat{\theta_0} &= \frac{1}{n} \sum^{n}_{i=1} y_i
@@ -159,7 +159,6 @@ The code for generating the graphs and models is included below, but we won't go
 
 ```{python}
 #| code-fold: true
-#| vscode: {languageId: python}
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
@@ -174,7 +173,6 @@ data_linear = dugongs[["Length", "Age"]]
 
 ```{python}
 #| code-fold: true
-#| vscode: {languageId: python}
 # Big font helper
 def adjust_fontsize(size=None):
     SMALL_SIZE = 8
@@ -196,7 +194,6 @@ plt.style.use("default")  # Revert style to default mpl
 
 ```{python}
 #| code-fold: true
-#| vscode: {languageId: python}
 # Constant Model + MSE
 plt.style.use('default') # Revert style to default mpl
 adjust_fontsize(size=16)
@@ -222,7 +219,6 @@ plt.legend();
 
 ```{python}
 #| code-fold: true
-#| vscode: {languageId: python}
 # SLR + MSE
 def mse_linear(theta_0, theta_1, data_linear):
     data_x, data_y = data_linear.iloc[:, 0], data_linear.iloc[:, 1]
@@ -278,7 +274,6 @@ ax.set_zlabel("MSE");
 
 ```{python}
 #| code-fold: true
-#| vscode: {languageId: python}
 # Predictions
 yobs = data_linear["Age"]  # The true observations y
 xs = data_linear["Length"]  # Needed for linear predictions
@@ -290,7 +285,6 @@ yhats_linear = [theta_0_hat + theta_1_hat * x for x in xs]
 
 ```{python}
 #| code-fold: true
-#| vscode: {languageId: python}
 # Constant Model Rug Plot
 # In case we're in a weird style state
 sns.set_theme()
@@ -307,7 +301,6 @@ plt.yticks([]);
 
 ```{python}
 #| code-fold: true
-#| vscode: {languageId: python}
 # SLR model scatter plot 
 # In case we're in a weird style state
 sns.set_theme()
@@ -421,7 +414,6 @@ Let's consider a dataset where each entry represents the number of drinks sold a
 
 ```{python}
 #| code-fold: false
-#| vscode: {languageId: python}
 drinks = np.array([20, 21, 22, 29, 33])
 drinks
 ```
@@ -430,7 +422,6 @@ From our derivations above, we know that the optimal model parameter under MSE c
 
 ```{python}
 #| code-fold: false
-#| vscode: {languageId: python}
 np.mean(drinks), np.median(drinks)
 ```
 
@@ -444,7 +435,6 @@ How do outliers affect each cost function? Imagine we replace the largest value
 
 ```{python}
 #| code-fold: false
-#| vscode: {languageId: python}
 drinks_with_outlier = np.append(drinks, 1033)
 display(drinks_with_outlier)
 np.mean(drinks_with_outlier), np.median(drinks_with_outlier)
@@ -458,7 +448,6 @@ Let's try another experiment. This time, we'll add an additional, non-outlying d
 
 ```{python}
 #| code-fold: false
-#| vscode: {languageId: python}
 drinks_with_additional_observation = np.append(drinks, 35)
 drinks_with_additional_observation
 ```
@@ -502,7 +491,6 @@ Let's revisit our dugongs example. The lengths and ages are plotted below:
 
 ```{python}
 #| code-fold: true
-#| vscode: {languageId: python}
 # `corrcoef` computes the correlation coefficient between two variables
 # `std` finds the standard deviation
 x = dugongs["Length"]
@@ -530,7 +518,6 @@ An important word on $\log$: in Data 100 (and most upper-division STEM courses),
 
 ```{python}
 #| code-fold: true
-#| vscode: {languageId: python}
 z = np.log(y)
 
 r = np.corrcoef(x, z)[0, 1]
@@ -568,7 +555,6 @@ $y$ is an *exponential* function of $x$. Applying an exponential fit to the untr
 
 ```{python}
 #| code-fold: true
-#| vscode: {languageId: python}
 plt.figure(dpi=120, figsize=(4, 3))
 
 plt.scatter(x, y)
diff --git a/docs/constant_model_loss_transformations/loss_transformations.html b/docs/constant_model_loss_transformations/loss_transformations.html
index 18bb4607..6e00683b 100644
--- a/docs/constant_model_loss_transformations/loss_transformations.html
+++ b/docs/constant_model_loss_transformations/loss_transformations.html
@@ -406,7 +406,7 @@ <h3 data-number="11.1.1" class="anchored" data-anchor-id="deriving-the-optimal-t
 \begin{align}
 0 &amp;= {\frac{-2}{n}}\sum^{n}_{i=1} (y_i - \hat{\theta_0})
 \\ &amp;= \sum^{n}_{i=1} (y_i - \hat{\theta_0}) \quad \quad \text{divide both sides by} \frac{-2}{n}
-\\ &amp;= \left(\sum^{n}_{i=1} y_i\right) - \left(\sum^{n}_{i=1} \theta_0\right) \quad \quad \text{separate sums}
+\\ &amp;= \left(\sum^{n}_{i=1} y_i\right) - \left(\sum^{n}_{i=1} \hat{\theta_0}\right) \quad \quad \text{separate sums}
 \\ &amp;= \left(\sum^{n}_{i=1} y_i\right) - (n \cdot \hat{\theta_0}) \quad \quad  \text{c + c + … + c = nc}
 \\ n \cdot \hat{\theta_0} &amp;= \sum^{n}_{i=1} y_i
 \\ \hat{\theta_0} &amp;= \frac{1}{n} \sum^{n}_{i=1} y_i
@@ -477,7 +477,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </table>
 <p>(Notice how the points for our SLR scatter plot are visually not a great linear fit. We’ll come back to this).</p>
 <p>The code for generating the graphs and models is included below, but we won’t go over it in too much depth.</p>
-<div id="68baa7f1" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="1">
+<div id="eda29711" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -492,7 +492,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 <span id="cb1-10"><a href="#cb1-10" aria-hidden="true" tabindex="-1"></a>data_linear <span class="op">=</span> dugongs[[<span class="st">"Length"</span>, <span class="st">"Age"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="5197fce3" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="2">
+<div id="4ad0735a" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Big font helper</span></span>
@@ -514,7 +514,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 <span id="cb2-17"><a href="#cb2-17" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">"default"</span>)  <span class="co"># Revert style to default mpl</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="3616d862" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="3">
+<div id="8dae850e" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Constant Model + MSE</span></span>
@@ -547,7 +547,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </div>
 </div>
 </div>
-<div id="6a21a16a" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="4">
+<div id="4c3f5dd1" class="cell" data-execution_count="4">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SLR + MSE</span></span>
@@ -610,7 +610,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </div>
 </div>
 </div>
-<div id="3178d5e7" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="5">
+<div id="6661ec9a" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Predictions</span></span>
@@ -622,7 +622,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 <span id="cb5-7"><a href="#cb5-7" aria-hidden="true" tabindex="-1"></a>yhats_linear <span class="op">=</span> [theta_0_hat <span class="op">+</span> theta_1_hat <span class="op">*</span> x <span class="cf">for</span> x <span class="kw">in</span> xs]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="f1053e73" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
+<div id="d835c4f3" class="cell" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Constant Model Rug Plot</span></span>
@@ -652,7 +652,7 @@ <h3 data-number="11.1.2" class="anchored" data-anchor-id="comparing-two-differen
 </div>
 </div>
 </div>
-<div id="af5c55dd" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
+<div id="60c1fc42" class="cell" data-execution_count="7">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="co"># SLR model scatter plot </span></span>
@@ -766,7 +766,7 @@ <h2 data-number="11.3" class="anchored" data-anchor-id="summary-loss-optimizatio
 <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions"><span class="header-section-number">11.4</span> Comparing Loss Functions</h2>
 <p>We’ve now tried our hand at fitting a model under both MSE and MAE cost functions. How do the two results compare?</p>
 <p>Let’s consider a dataset where each entry represents the number of drinks sold at a bubble tea store each day. We’ll fit a constant model to predict the number of drinks that will be sold tomorrow.</p>
-<div id="33810496" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
+<div id="5221a152" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>drinks <span class="op">=</span> np.array([<span class="dv">20</span>, <span class="dv">21</span>, <span class="dv">22</span>, <span class="dv">29</span>, <span class="dv">33</span>])</span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>drinks</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
@@ -774,7 +774,7 @@ <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions
 </div>
 </div>
 <p>From our derivations above, we know that the optimal model parameter under MSE cost is the mean of the dataset. Under MAE cost, the optimal parameter is the median of the dataset.</p>
-<div id="50a90a22" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
+<div id="2c16bea8" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>np.mean(drinks), np.median(drinks)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
 <pre><code>(np.float64(25.0), np.float64(22.0))</code></pre>
@@ -784,7 +784,7 @@ <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions
 <p><img src="images/error.png" alt="error" width="600"></p>
 <p>Notice that the MSE above is a <strong>smooth</strong> function – it is differentiable at all points, making it easy to minimize using numerical methods. The MAE, in contrast, is not differentiable at each of its “kinks.” We’ll explore how the smoothness of the cost function can impact our ability to apply numerical optimization in a few weeks.</p>
 <p>How do outliers affect each cost function? Imagine we replace the largest value in the dataset with 1000. The mean of the data increases substantially, while the median is nearly unaffected.</p>
-<div id="17fb22e7" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
+<div id="24cefa0c" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>drinks_with_outlier <span class="op">=</span> np.append(drinks, <span class="dv">1033</span>)</span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>display(drinks_with_outlier)</span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>np.mean(drinks_with_outlier), np.median(drinks_with_outlier)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -798,7 +798,7 @@ <h2 data-number="11.4" class="anchored" data-anchor-id="comparing-loss-functions
 <p><img src="images/outliers.png" alt="outliers" width="700"></p>
 <p>This means that under the MSE, the optimal model parameter <span class="math inline">\(\hat{\theta}\)</span> is strongly affected by the presence of outliers. Under the MAE, the optimal parameter is not as influenced by outlying data. We can generalize this by saying that the MSE is <strong>sensitive</strong> to outliers, while the MAE is <strong>robust</strong> to outliers.</p>
 <p>Let’s try another experiment. This time, we’ll add an additional, non-outlying datapoint to the data.</p>
-<div id="ce878e9c" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
+<div id="87fd7515" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>drinks_with_additional_observation <span class="op">=</span> np.append(drinks, <span class="dv">35</span>)</span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>drinks_with_additional_observation</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="11">
@@ -870,7 +870,7 @@ <h2 data-number="11.5" class="anchored" data-anchor-id="transformations-to-fit-l
 </ul>
 <p>Other goals in addition to linearity are possible, for example, making data appear more symmetric. Linearity allows us to fit lines to the transformed data.</p>
 <p>Let’s revisit our dugongs example. The lengths and ages are plotted below:</p>
-<div id="cb75285f" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
+<div id="a7cb1b10" class="cell" data-execution_count="12">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="co"># `corrcoef` computes the correlation coefficient between two variables</span></span>
@@ -902,7 +902,7 @@ <h2 data-number="11.5" class="anchored" data-anchor-id="transformations-to-fit-l
 <p>Looking at the plot on the left, we see that there is a slight curvature to the data points. Plotting the SLR curve on the right results in a poor fit.</p>
 <p>For SLR to perform well, we’d like there to be a rough linear trend relating <code>"Age"</code> and <code>"Length"</code>. What is making the raw data deviate from a linear relationship? Notice that the data points with <code>"Length"</code> greater than 2.6 have disproportionately high values of <code>"Age"</code> relative to the rest of the data. If we could manipulate these data points to have lower <code>"Age"</code> values, we’d “shift” these points downwards and reduce the curvature in the data. Applying a logarithmic transformation to <span class="math inline">\(y_i\)</span> (that is, taking <span class="math inline">\(\log(\)</span> <code>"Age"</code> <span class="math inline">\()\)</span> ) would achieve just that.</p>
 <p>An important word on <span class="math inline">\(\log\)</span>: in Data 100 (and most upper-division STEM courses), <span class="math inline">\(\log\)</span> denotes the natural logarithm with base <span class="math inline">\(e\)</span>. The base-10 logarithm, where relevant, is indicated by <span class="math inline">\(\log_{10}\)</span>.</p>
-<div id="dda679a5" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
+<div id="3c089524" class="cell" data-execution_count="13">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>z <span class="op">=</span> np.log(y)</span>
@@ -937,7 +937,7 @@ <h2 data-number="11.5" class="anchored" data-anchor-id="transformations-to-fit-l
 <p><span class="math display">\[\log{(y)} = \theta_0 + \theta_1 x\]</span> <span class="math display">\[y = e^{\theta_0 + \theta_1 x}\]</span> <span class="math display">\[y = (e^{\theta_0})e^{\theta_1 x}\]</span> <span class="math display">\[y_i = C e^{k x}\]</span></p>
 <p>For some constants <span class="math inline">\(C\)</span> and <span class="math inline">\(k\)</span>.</p>
 <p><span class="math inline">\(y\)</span> is an <em>exponential</em> function of <span class="math inline">\(x\)</span>. Applying an exponential fit to the untransformed variables corroborates this finding.</p>
-<div id="20559d68" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
+<div id="d0ffd884" class="cell" data-execution_count="14">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>plt.figure(dpi<span class="op">=</span><span class="dv">120</span>, figsize<span class="op">=</span>(<span class="dv">4</span>, <span class="dv">3</span>))</span>
@@ -1482,7 +1482,7 @@ <h5 data-number="11.7.0.0.1" class="anchored" data-anchor-id="note"><span class=
 <span id="cb21-21"><a href="#cb21-21" aria-hidden="true" tabindex="-1"></a><span class="co">      format_version: '1.0'</span></span>
 <span id="cb21-22"><a href="#cb21-22" aria-hidden="true" tabindex="-1"></a><span class="co">      jupytext_version: 1.16.1</span></span>
 <span id="cb21-23"><a href="#cb21-23" aria-hidden="true" tabindex="-1"></a><span class="co">  kernelspec:</span></span>
-<span id="cb21-24"><a href="#cb21-24" aria-hidden="true" tabindex="-1"></a><span class="co">    display_name: Python 3 (ipykernel)</span></span>
+<span id="cb21-24"><a href="#cb21-24" aria-hidden="true" tabindex="-1"></a><span class="co">    display_name: ds100env</span></span>
 <span id="cb21-25"><a href="#cb21-25" aria-hidden="true" tabindex="-1"></a><span class="co">    language: python</span></span>
 <span id="cb21-26"><a href="#cb21-26" aria-hidden="true" tabindex="-1"></a><span class="co">    name: python3</span></span>
 <span id="cb21-27"><a href="#cb21-27" aria-hidden="true" tabindex="-1"></a><span class="co">---</span></span>
@@ -1577,7 +1577,7 @@ <h5 data-number="11.7.0.0.1" class="anchored" data-anchor-id="note"><span class=
 <span id="cb21-116"><a href="#cb21-116" aria-hidden="true" tabindex="-1"></a>\begin{align}</span>
 <span id="cb21-117"><a href="#cb21-117" aria-hidden="true" tabindex="-1"></a>0 &amp;= {\frac{-2}{n}}\sum^{n}_{i=1} (y_i - \hat{\theta_0})</span>
 <span id="cb21-118"><a href="#cb21-118" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \sum^{n}_{i=1} (y_i - \hat{\theta_0}) \quad \quad \text{divide both sides by} \frac{-2}{n}</span>
-<span id="cb21-119"><a href="#cb21-119" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \left(\sum^{n}_{i=1} y_i\right) - \left(\sum^{n}_{i=1} \theta_0\right) \quad \quad \text{separate sums}</span>
+<span id="cb21-119"><a href="#cb21-119" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \left(\sum^{n}_{i=1} y_i\right) - \left(\sum^{n}_{i=1} \hat{\theta_0}\right) \quad \quad \text{separate sums}</span>
 <span id="cb21-120"><a href="#cb21-120" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \left(\sum^{n}_{i=1} y_i\right) - (n \cdot \hat{\theta_0}) \quad \quad  \text{c + c + … + c = nc}</span>
 <span id="cb21-121"><a href="#cb21-121" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> n \cdot \hat{\theta_0} &amp;= \sum^{n}_{i=1} y_i</span>
 <span id="cb21-122"><a href="#cb21-122" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> \hat{\theta_0} &amp;= \frac{1}{n} \sum^{n}_{i=1} y_i</span>
@@ -1620,484 +1620,470 @@ <h5 data-number="11.7.0.0.1" class="anchored" data-anchor-id="note"><span class=
 <span id="cb21-159"><a href="#cb21-159" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb21-162"><a href="#cb21-162" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
 <span id="cb21-163"><a href="#cb21-163" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb21-164"><a href="#cb21-164" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-165"><a href="#cb21-165" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
-<span id="cb21-166"><a href="#cb21-166" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
-<span id="cb21-167"><a href="#cb21-167" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
-<span id="cb21-168"><a href="#cb21-168" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>matplotlib inline</span>
-<span id="cb21-169"><a href="#cb21-169" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
-<span id="cb21-170"><a href="#cb21-170" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> itertools</span>
-<span id="cb21-171"><a href="#cb21-171" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mpl_toolkits.mplot3d <span class="im">import</span> Axes3D</span>
-<span id="cb21-172"><a href="#cb21-172" aria-hidden="true" tabindex="-1"></a>dugongs <span class="op">=</span> pd.read_csv(<span class="st">"data/dugongs.csv"</span>)</span>
-<span id="cb21-173"><a href="#cb21-173" aria-hidden="true" tabindex="-1"></a>data_constant <span class="op">=</span> dugongs[<span class="st">"Age"</span>]</span>
-<span id="cb21-174"><a href="#cb21-174" aria-hidden="true" tabindex="-1"></a>data_linear <span class="op">=</span> dugongs[[<span class="st">"Length"</span>, <span class="st">"Age"</span>]]</span>
-<span id="cb21-175"><a href="#cb21-175" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb21-176"><a href="#cb21-176" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-179"><a href="#cb21-179" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-180"><a href="#cb21-180" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb21-181"><a href="#cb21-181" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-182"><a href="#cb21-182" aria-hidden="true" tabindex="-1"></a><span class="co"># Big font helper</span></span>
-<span id="cb21-183"><a href="#cb21-183" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> adjust_fontsize(size<span class="op">=</span><span class="va">None</span>):</span>
-<span id="cb21-184"><a href="#cb21-184" aria-hidden="true" tabindex="-1"></a>    SMALL_SIZE <span class="op">=</span> <span class="dv">8</span></span>
-<span id="cb21-185"><a href="#cb21-185" aria-hidden="true" tabindex="-1"></a>    MEDIUM_SIZE <span class="op">=</span> <span class="dv">10</span></span>
-<span id="cb21-186"><a href="#cb21-186" aria-hidden="true" tabindex="-1"></a>    BIGGER_SIZE <span class="op">=</span> <span class="dv">12</span></span>
-<span id="cb21-187"><a href="#cb21-187" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> size <span class="op">!=</span> <span class="va">None</span>:</span>
-<span id="cb21-188"><a href="#cb21-188" aria-hidden="true" tabindex="-1"></a>        SMALL_SIZE <span class="op">=</span> MEDIUM_SIZE <span class="op">=</span> BIGGER_SIZE <span class="op">=</span> size</span>
-<span id="cb21-189"><a href="#cb21-189" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-190"><a href="#cb21-190" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"font"</span>, size<span class="op">=</span>SMALL_SIZE)  <span class="co"># controls default text sizes</span></span>
-<span id="cb21-191"><a href="#cb21-191" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"axes"</span>, titlesize<span class="op">=</span>SMALL_SIZE)  <span class="co"># fontsize of the axes title</span></span>
-<span id="cb21-192"><a href="#cb21-192" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"axes"</span>, labelsize<span class="op">=</span>MEDIUM_SIZE)  <span class="co"># fontsize of the x and y labels</span></span>
-<span id="cb21-193"><a href="#cb21-193" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"xtick"</span>, labelsize<span class="op">=</span>SMALL_SIZE)  <span class="co"># fontsize of the tick labels</span></span>
-<span id="cb21-194"><a href="#cb21-194" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"ytick"</span>, labelsize<span class="op">=</span>SMALL_SIZE)  <span class="co"># fontsize of the tick labels</span></span>
-<span id="cb21-195"><a href="#cb21-195" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"legend"</span>, fontsize<span class="op">=</span>SMALL_SIZE)  <span class="co"># legend fontsize</span></span>
-<span id="cb21-196"><a href="#cb21-196" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"figure"</span>, titlesize<span class="op">=</span>BIGGER_SIZE)  <span class="co"># fontsize of the figure title</span></span>
-<span id="cb21-197"><a href="#cb21-197" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-198"><a href="#cb21-198" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">"default"</span>)  <span class="co"># Revert style to default mpl</span></span>
-<span id="cb21-199"><a href="#cb21-199" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb21-200"><a href="#cb21-200" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-203"><a href="#cb21-203" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-204"><a href="#cb21-204" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb21-205"><a href="#cb21-205" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-206"><a href="#cb21-206" aria-hidden="true" tabindex="-1"></a><span class="co"># Constant Model + MSE</span></span>
-<span id="cb21-207"><a href="#cb21-207" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">'default'</span>) <span class="co"># Revert style to default mpl</span></span>
-<span id="cb21-208"><a href="#cb21-208" aria-hidden="true" tabindex="-1"></a>adjust_fontsize(size<span class="op">=</span><span class="dv">16</span>)</span>
-<span id="cb21-209"><a href="#cb21-209" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>matplotlib inline</span>
+<span id="cb21-164"><a href="#cb21-164" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
+<span id="cb21-165"><a href="#cb21-165" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
+<span id="cb21-166"><a href="#cb21-166" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
+<span id="cb21-167"><a href="#cb21-167" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>matplotlib inline</span>
+<span id="cb21-168"><a href="#cb21-168" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
+<span id="cb21-169"><a href="#cb21-169" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> itertools</span>
+<span id="cb21-170"><a href="#cb21-170" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mpl_toolkits.mplot3d <span class="im">import</span> Axes3D</span>
+<span id="cb21-171"><a href="#cb21-171" aria-hidden="true" tabindex="-1"></a>dugongs <span class="op">=</span> pd.read_csv(<span class="st">"data/dugongs.csv"</span>)</span>
+<span id="cb21-172"><a href="#cb21-172" aria-hidden="true" tabindex="-1"></a>data_constant <span class="op">=</span> dugongs[<span class="st">"Age"</span>]</span>
+<span id="cb21-173"><a href="#cb21-173" aria-hidden="true" tabindex="-1"></a>data_linear <span class="op">=</span> dugongs[[<span class="st">"Length"</span>, <span class="st">"Age"</span>]]</span>
+<span id="cb21-174"><a href="#cb21-174" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-175"><a href="#cb21-175" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-178"><a href="#cb21-178" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-179"><a href="#cb21-179" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb21-180"><a href="#cb21-180" aria-hidden="true" tabindex="-1"></a><span class="co"># Big font helper</span></span>
+<span id="cb21-181"><a href="#cb21-181" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> adjust_fontsize(size<span class="op">=</span><span class="va">None</span>):</span>
+<span id="cb21-182"><a href="#cb21-182" aria-hidden="true" tabindex="-1"></a>    SMALL_SIZE <span class="op">=</span> <span class="dv">8</span></span>
+<span id="cb21-183"><a href="#cb21-183" aria-hidden="true" tabindex="-1"></a>    MEDIUM_SIZE <span class="op">=</span> <span class="dv">10</span></span>
+<span id="cb21-184"><a href="#cb21-184" aria-hidden="true" tabindex="-1"></a>    BIGGER_SIZE <span class="op">=</span> <span class="dv">12</span></span>
+<span id="cb21-185"><a href="#cb21-185" aria-hidden="true" tabindex="-1"></a>    <span class="cf">if</span> size <span class="op">!=</span> <span class="va">None</span>:</span>
+<span id="cb21-186"><a href="#cb21-186" aria-hidden="true" tabindex="-1"></a>        SMALL_SIZE <span class="op">=</span> MEDIUM_SIZE <span class="op">=</span> BIGGER_SIZE <span class="op">=</span> size</span>
+<span id="cb21-187"><a href="#cb21-187" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-188"><a href="#cb21-188" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"font"</span>, size<span class="op">=</span>SMALL_SIZE)  <span class="co"># controls default text sizes</span></span>
+<span id="cb21-189"><a href="#cb21-189" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"axes"</span>, titlesize<span class="op">=</span>SMALL_SIZE)  <span class="co"># fontsize of the axes title</span></span>
+<span id="cb21-190"><a href="#cb21-190" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"axes"</span>, labelsize<span class="op">=</span>MEDIUM_SIZE)  <span class="co"># fontsize of the x and y labels</span></span>
+<span id="cb21-191"><a href="#cb21-191" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"xtick"</span>, labelsize<span class="op">=</span>SMALL_SIZE)  <span class="co"># fontsize of the tick labels</span></span>
+<span id="cb21-192"><a href="#cb21-192" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"ytick"</span>, labelsize<span class="op">=</span>SMALL_SIZE)  <span class="co"># fontsize of the tick labels</span></span>
+<span id="cb21-193"><a href="#cb21-193" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"legend"</span>, fontsize<span class="op">=</span>SMALL_SIZE)  <span class="co"># legend fontsize</span></span>
+<span id="cb21-194"><a href="#cb21-194" aria-hidden="true" tabindex="-1"></a>    plt.rc(<span class="st">"figure"</span>, titlesize<span class="op">=</span>BIGGER_SIZE)  <span class="co"># fontsize of the figure title</span></span>
+<span id="cb21-195"><a href="#cb21-195" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-196"><a href="#cb21-196" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">"default"</span>)  <span class="co"># Revert style to default mpl</span></span>
+<span id="cb21-197"><a href="#cb21-197" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-198"><a href="#cb21-198" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-201"><a href="#cb21-201" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-202"><a href="#cb21-202" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb21-203"><a href="#cb21-203" aria-hidden="true" tabindex="-1"></a><span class="co"># Constant Model + MSE</span></span>
+<span id="cb21-204"><a href="#cb21-204" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">'default'</span>) <span class="co"># Revert style to default mpl</span></span>
+<span id="cb21-205"><a href="#cb21-205" aria-hidden="true" tabindex="-1"></a>adjust_fontsize(size<span class="op">=</span><span class="dv">16</span>)</span>
+<span id="cb21-206"><a href="#cb21-206" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>matplotlib inline</span>
+<span id="cb21-207"><a href="#cb21-207" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-208"><a href="#cb21-208" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_constant(theta, data):</span>
+<span id="cb21-209"><a href="#cb21-209" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean(np.array([(y_obs <span class="op">-</span> theta) <span class="op">**</span> <span class="dv">2</span> <span class="cf">for</span> y_obs <span class="kw">in</span> data]), axis<span class="op">=</span><span class="dv">0</span>)</span>
 <span id="cb21-210"><a href="#cb21-210" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-211"><a href="#cb21-211" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_constant(theta, data):</span>
-<span id="cb21-212"><a href="#cb21-212" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean(np.array([(y_obs <span class="op">-</span> theta) <span class="op">**</span> <span class="dv">2</span> <span class="cf">for</span> y_obs <span class="kw">in</span> data]), axis<span class="op">=</span><span class="dv">0</span>)</span>
+<span id="cb21-211"><a href="#cb21-211" aria-hidden="true" tabindex="-1"></a>thetas <span class="op">=</span> np.linspace(<span class="op">-</span><span class="dv">20</span>, <span class="dv">42</span>, <span class="dv">1000</span>)</span>
+<span id="cb21-212"><a href="#cb21-212" aria-hidden="true" tabindex="-1"></a>l2_loss_thetas <span class="op">=</span> mse_constant(thetas, data_constant)</span>
 <span id="cb21-213"><a href="#cb21-213" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-214"><a href="#cb21-214" aria-hidden="true" tabindex="-1"></a>thetas <span class="op">=</span> np.linspace(<span class="op">-</span><span class="dv">20</span>, <span class="dv">42</span>, <span class="dv">1000</span>)</span>
-<span id="cb21-215"><a href="#cb21-215" aria-hidden="true" tabindex="-1"></a>l2_loss_thetas <span class="op">=</span> mse_constant(thetas, data_constant)</span>
-<span id="cb21-216"><a href="#cb21-216" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-217"><a href="#cb21-217" aria-hidden="true" tabindex="-1"></a><span class="co"># Plotting the loss surface</span></span>
-<span id="cb21-218"><a href="#cb21-218" aria-hidden="true" tabindex="-1"></a>plt.plot(thetas, l2_loss_thetas)</span>
-<span id="cb21-219"><a href="#cb21-219" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="vs">r'$\theta_0$'</span>)</span>
-<span id="cb21-220"><a href="#cb21-220" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="vs">r'MSE'</span>)</span>
-<span id="cb21-221"><a href="#cb21-221" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-222"><a href="#cb21-222" aria-hidden="true" tabindex="-1"></a><span class="co"># Optimal point</span></span>
-<span id="cb21-223"><a href="#cb21-223" aria-hidden="true" tabindex="-1"></a>thetahat <span class="op">=</span> np.mean(data_constant)</span>
-<span id="cb21-224"><a href="#cb21-224" aria-hidden="true" tabindex="-1"></a>plt.scatter([thetahat], [mse_constant(thetahat, data_constant)], s<span class="op">=</span><span class="dv">50</span>, label <span class="op">=</span> <span class="vs">r"$\hat{\theta}_0$"</span>)</span>
-<span id="cb21-225"><a href="#cb21-225" aria-hidden="true" tabindex="-1"></a>plt.legend()<span class="op">;</span></span>
-<span id="cb21-226"><a href="#cb21-226" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.show()</span></span>
-<span id="cb21-227"><a href="#cb21-227" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb21-228"><a href="#cb21-228" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-231"><a href="#cb21-231" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-232"><a href="#cb21-232" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb21-233"><a href="#cb21-233" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-234"><a href="#cb21-234" aria-hidden="true" tabindex="-1"></a><span class="co"># SLR + MSE</span></span>
-<span id="cb21-235"><a href="#cb21-235" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_linear(theta_0, theta_1, data_linear):</span>
-<span id="cb21-236"><a href="#cb21-236" aria-hidden="true" tabindex="-1"></a>    data_x, data_y <span class="op">=</span> data_linear.iloc[:, <span class="dv">0</span>], data_linear.iloc[:, <span class="dv">1</span>]</span>
-<span id="cb21-237"><a href="#cb21-237" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean(</span>
-<span id="cb21-238"><a href="#cb21-238" aria-hidden="true" tabindex="-1"></a>        np.array([(y <span class="op">-</span> (theta_0 <span class="op">+</span> theta_1 <span class="op">*</span> x)) <span class="op">**</span> <span class="dv">2</span> <span class="cf">for</span> x, y <span class="kw">in</span> <span class="bu">zip</span>(data_x, data_y)]),</span>
-<span id="cb21-239"><a href="#cb21-239" aria-hidden="true" tabindex="-1"></a>        axis<span class="op">=</span><span class="dv">0</span>,</span>
-<span id="cb21-240"><a href="#cb21-240" aria-hidden="true" tabindex="-1"></a>    )</span>
-<span id="cb21-241"><a href="#cb21-241" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-242"><a href="#cb21-242" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-243"><a href="#cb21-243" aria-hidden="true" tabindex="-1"></a><span class="co"># plotting the loss surface</span></span>
-<span id="cb21-244"><a href="#cb21-244" aria-hidden="true" tabindex="-1"></a>theta_0_values <span class="op">=</span> np.linspace(<span class="op">-</span><span class="dv">80</span>, <span class="dv">20</span>, <span class="dv">80</span>)</span>
-<span id="cb21-245"><a href="#cb21-245" aria-hidden="true" tabindex="-1"></a>theta_1_values <span class="op">=</span> np.linspace(<span class="op">-</span><span class="dv">10</span>, <span class="dv">30</span>, <span class="dv">80</span>)</span>
-<span id="cb21-246"><a href="#cb21-246" aria-hidden="true" tabindex="-1"></a>mse_values <span class="op">=</span> np.array(</span>
-<span id="cb21-247"><a href="#cb21-247" aria-hidden="true" tabindex="-1"></a>    [[mse_linear(x, y, data_linear) <span class="cf">for</span> x <span class="kw">in</span> theta_0_values] <span class="cf">for</span> y <span class="kw">in</span> theta_1_values]</span>
-<span id="cb21-248"><a href="#cb21-248" aria-hidden="true" tabindex="-1"></a>)</span>
-<span id="cb21-249"><a href="#cb21-249" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-250"><a href="#cb21-250" aria-hidden="true" tabindex="-1"></a><span class="co"># Optimal point</span></span>
-<span id="cb21-251"><a href="#cb21-251" aria-hidden="true" tabindex="-1"></a>data_x, data_y <span class="op">=</span> data_linear.iloc[:, <span class="dv">0</span>], data_linear.iloc[:, <span class="dv">1</span>]</span>
-<span id="cb21-252"><a href="#cb21-252" aria-hidden="true" tabindex="-1"></a>theta_1_hat <span class="op">=</span> np.corrcoef(data_x, data_y)[<span class="dv">0</span>, <span class="dv">1</span>] <span class="op">*</span> np.std(data_y) <span class="op">/</span> np.std(data_x)</span>
-<span id="cb21-253"><a href="#cb21-253" aria-hidden="true" tabindex="-1"></a>theta_0_hat <span class="op">=</span> np.mean(data_y) <span class="op">-</span> theta_1_hat <span class="op">*</span> np.mean(data_x)</span>
+<span id="cb21-214"><a href="#cb21-214" aria-hidden="true" tabindex="-1"></a><span class="co"># Plotting the loss surface</span></span>
+<span id="cb21-215"><a href="#cb21-215" aria-hidden="true" tabindex="-1"></a>plt.plot(thetas, l2_loss_thetas)</span>
+<span id="cb21-216"><a href="#cb21-216" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="vs">r'$\theta_0$'</span>)</span>
+<span id="cb21-217"><a href="#cb21-217" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="vs">r'MSE'</span>)</span>
+<span id="cb21-218"><a href="#cb21-218" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-219"><a href="#cb21-219" aria-hidden="true" tabindex="-1"></a><span class="co"># Optimal point</span></span>
+<span id="cb21-220"><a href="#cb21-220" aria-hidden="true" tabindex="-1"></a>thetahat <span class="op">=</span> np.mean(data_constant)</span>
+<span id="cb21-221"><a href="#cb21-221" aria-hidden="true" tabindex="-1"></a>plt.scatter([thetahat], [mse_constant(thetahat, data_constant)], s<span class="op">=</span><span class="dv">50</span>, label <span class="op">=</span> <span class="vs">r"$\hat{\theta}_0$"</span>)</span>
+<span id="cb21-222"><a href="#cb21-222" aria-hidden="true" tabindex="-1"></a>plt.legend()<span class="op">;</span></span>
+<span id="cb21-223"><a href="#cb21-223" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.show()</span></span>
+<span id="cb21-224"><a href="#cb21-224" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-225"><a href="#cb21-225" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-228"><a href="#cb21-228" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-229"><a href="#cb21-229" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb21-230"><a href="#cb21-230" aria-hidden="true" tabindex="-1"></a><span class="co"># SLR + MSE</span></span>
+<span id="cb21-231"><a href="#cb21-231" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> mse_linear(theta_0, theta_1, data_linear):</span>
+<span id="cb21-232"><a href="#cb21-232" aria-hidden="true" tabindex="-1"></a>    data_x, data_y <span class="op">=</span> data_linear.iloc[:, <span class="dv">0</span>], data_linear.iloc[:, <span class="dv">1</span>]</span>
+<span id="cb21-233"><a href="#cb21-233" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> np.mean(</span>
+<span id="cb21-234"><a href="#cb21-234" aria-hidden="true" tabindex="-1"></a>        np.array([(y <span class="op">-</span> (theta_0 <span class="op">+</span> theta_1 <span class="op">*</span> x)) <span class="op">**</span> <span class="dv">2</span> <span class="cf">for</span> x, y <span class="kw">in</span> <span class="bu">zip</span>(data_x, data_y)]),</span>
+<span id="cb21-235"><a href="#cb21-235" aria-hidden="true" tabindex="-1"></a>        axis<span class="op">=</span><span class="dv">0</span>,</span>
+<span id="cb21-236"><a href="#cb21-236" aria-hidden="true" tabindex="-1"></a>    )</span>
+<span id="cb21-237"><a href="#cb21-237" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-238"><a href="#cb21-238" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-239"><a href="#cb21-239" aria-hidden="true" tabindex="-1"></a><span class="co"># plotting the loss surface</span></span>
+<span id="cb21-240"><a href="#cb21-240" aria-hidden="true" tabindex="-1"></a>theta_0_values <span class="op">=</span> np.linspace(<span class="op">-</span><span class="dv">80</span>, <span class="dv">20</span>, <span class="dv">80</span>)</span>
+<span id="cb21-241"><a href="#cb21-241" aria-hidden="true" tabindex="-1"></a>theta_1_values <span class="op">=</span> np.linspace(<span class="op">-</span><span class="dv">10</span>, <span class="dv">30</span>, <span class="dv">80</span>)</span>
+<span id="cb21-242"><a href="#cb21-242" aria-hidden="true" tabindex="-1"></a>mse_values <span class="op">=</span> np.array(</span>
+<span id="cb21-243"><a href="#cb21-243" aria-hidden="true" tabindex="-1"></a>    [[mse_linear(x, y, data_linear) <span class="cf">for</span> x <span class="kw">in</span> theta_0_values] <span class="cf">for</span> y <span class="kw">in</span> theta_1_values]</span>
+<span id="cb21-244"><a href="#cb21-244" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb21-245"><a href="#cb21-245" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-246"><a href="#cb21-246" aria-hidden="true" tabindex="-1"></a><span class="co"># Optimal point</span></span>
+<span id="cb21-247"><a href="#cb21-247" aria-hidden="true" tabindex="-1"></a>data_x, data_y <span class="op">=</span> data_linear.iloc[:, <span class="dv">0</span>], data_linear.iloc[:, <span class="dv">1</span>]</span>
+<span id="cb21-248"><a href="#cb21-248" aria-hidden="true" tabindex="-1"></a>theta_1_hat <span class="op">=</span> np.corrcoef(data_x, data_y)[<span class="dv">0</span>, <span class="dv">1</span>] <span class="op">*</span> np.std(data_y) <span class="op">/</span> np.std(data_x)</span>
+<span id="cb21-249"><a href="#cb21-249" aria-hidden="true" tabindex="-1"></a>theta_0_hat <span class="op">=</span> np.mean(data_y) <span class="op">-</span> theta_1_hat <span class="op">*</span> np.mean(data_x)</span>
+<span id="cb21-250"><a href="#cb21-250" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-251"><a href="#cb21-251" aria-hidden="true" tabindex="-1"></a><span class="co"># Create the 3D plot</span></span>
+<span id="cb21-252"><a href="#cb21-252" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> plt.figure(figsize<span class="op">=</span>(<span class="dv">7</span>, <span class="dv">5</span>))</span>
+<span id="cb21-253"><a href="#cb21-253" aria-hidden="true" tabindex="-1"></a>ax <span class="op">=</span> fig.add_subplot(<span class="dv">111</span>, projection<span class="op">=</span><span class="st">"3d"</span>)</span>
 <span id="cb21-254"><a href="#cb21-254" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-255"><a href="#cb21-255" aria-hidden="true" tabindex="-1"></a><span class="co"># Create the 3D plot</span></span>
-<span id="cb21-256"><a href="#cb21-256" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> plt.figure(figsize<span class="op">=</span>(<span class="dv">7</span>, <span class="dv">5</span>))</span>
-<span id="cb21-257"><a href="#cb21-257" aria-hidden="true" tabindex="-1"></a>ax <span class="op">=</span> fig.add_subplot(<span class="dv">111</span>, projection<span class="op">=</span><span class="st">"3d"</span>)</span>
-<span id="cb21-258"><a href="#cb21-258" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-259"><a href="#cb21-259" aria-hidden="true" tabindex="-1"></a>X, Y <span class="op">=</span> np.meshgrid(theta_0_values, theta_1_values)</span>
-<span id="cb21-260"><a href="#cb21-260" aria-hidden="true" tabindex="-1"></a>surf <span class="op">=</span> ax.plot_surface(</span>
-<span id="cb21-261"><a href="#cb21-261" aria-hidden="true" tabindex="-1"></a>    X, Y, mse_values, cmap<span class="op">=</span><span class="st">"viridis"</span>, alpha<span class="op">=</span><span class="fl">0.6</span></span>
-<span id="cb21-262"><a href="#cb21-262" aria-hidden="true" tabindex="-1"></a>)  <span class="co"># Use alpha to make it slightly transparent</span></span>
-<span id="cb21-263"><a href="#cb21-263" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-264"><a href="#cb21-264" aria-hidden="true" tabindex="-1"></a><span class="co"># Scatter point using matplotlib</span></span>
-<span id="cb21-265"><a href="#cb21-265" aria-hidden="true" tabindex="-1"></a>sc <span class="op">=</span> ax.scatter(</span>
-<span id="cb21-266"><a href="#cb21-266" aria-hidden="true" tabindex="-1"></a>    [theta_0_hat],</span>
-<span id="cb21-267"><a href="#cb21-267" aria-hidden="true" tabindex="-1"></a>    [theta_1_hat],</span>
-<span id="cb21-268"><a href="#cb21-268" aria-hidden="true" tabindex="-1"></a>    [mse_linear(theta_0_hat, theta_1_hat, data_linear)],</span>
-<span id="cb21-269"><a href="#cb21-269" aria-hidden="true" tabindex="-1"></a>    marker<span class="op">=</span><span class="st">"o"</span>,</span>
-<span id="cb21-270"><a href="#cb21-270" aria-hidden="true" tabindex="-1"></a>    color<span class="op">=</span><span class="st">"red"</span>,</span>
-<span id="cb21-271"><a href="#cb21-271" aria-hidden="true" tabindex="-1"></a>    s<span class="op">=</span><span class="dv">100</span>,</span>
-<span id="cb21-272"><a href="#cb21-272" aria-hidden="true" tabindex="-1"></a>    label<span class="op">=</span><span class="st">"theta hat"</span>,</span>
-<span id="cb21-273"><a href="#cb21-273" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb21-255"><a href="#cb21-255" aria-hidden="true" tabindex="-1"></a>X, Y <span class="op">=</span> np.meshgrid(theta_0_values, theta_1_values)</span>
+<span id="cb21-256"><a href="#cb21-256" aria-hidden="true" tabindex="-1"></a>surf <span class="op">=</span> ax.plot_surface(</span>
+<span id="cb21-257"><a href="#cb21-257" aria-hidden="true" tabindex="-1"></a>    X, Y, mse_values, cmap<span class="op">=</span><span class="st">"viridis"</span>, alpha<span class="op">=</span><span class="fl">0.6</span></span>
+<span id="cb21-258"><a href="#cb21-258" aria-hidden="true" tabindex="-1"></a>)  <span class="co"># Use alpha to make it slightly transparent</span></span>
+<span id="cb21-259"><a href="#cb21-259" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-260"><a href="#cb21-260" aria-hidden="true" tabindex="-1"></a><span class="co"># Scatter point using matplotlib</span></span>
+<span id="cb21-261"><a href="#cb21-261" aria-hidden="true" tabindex="-1"></a>sc <span class="op">=</span> ax.scatter(</span>
+<span id="cb21-262"><a href="#cb21-262" aria-hidden="true" tabindex="-1"></a>    [theta_0_hat],</span>
+<span id="cb21-263"><a href="#cb21-263" aria-hidden="true" tabindex="-1"></a>    [theta_1_hat],</span>
+<span id="cb21-264"><a href="#cb21-264" aria-hidden="true" tabindex="-1"></a>    [mse_linear(theta_0_hat, theta_1_hat, data_linear)],</span>
+<span id="cb21-265"><a href="#cb21-265" aria-hidden="true" tabindex="-1"></a>    marker<span class="op">=</span><span class="st">"o"</span>,</span>
+<span id="cb21-266"><a href="#cb21-266" aria-hidden="true" tabindex="-1"></a>    color<span class="op">=</span><span class="st">"red"</span>,</span>
+<span id="cb21-267"><a href="#cb21-267" aria-hidden="true" tabindex="-1"></a>    s<span class="op">=</span><span class="dv">100</span>,</span>
+<span id="cb21-268"><a href="#cb21-268" aria-hidden="true" tabindex="-1"></a>    label<span class="op">=</span><span class="st">"theta hat"</span>,</span>
+<span id="cb21-269"><a href="#cb21-269" aria-hidden="true" tabindex="-1"></a>)</span>
+<span id="cb21-270"><a href="#cb21-270" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-271"><a href="#cb21-271" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a colorbar</span></span>
+<span id="cb21-272"><a href="#cb21-272" aria-hidden="true" tabindex="-1"></a>cbar <span class="op">=</span> fig.colorbar(surf, ax<span class="op">=</span>ax, shrink<span class="op">=</span><span class="fl">0.5</span>, aspect<span class="op">=</span><span class="dv">10</span>)</span>
+<span id="cb21-273"><a href="#cb21-273" aria-hidden="true" tabindex="-1"></a>cbar.set_label(<span class="st">"Cost Value"</span>)</span>
 <span id="cb21-274"><a href="#cb21-274" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-275"><a href="#cb21-275" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a colorbar</span></span>
-<span id="cb21-276"><a href="#cb21-276" aria-hidden="true" tabindex="-1"></a>cbar <span class="op">=</span> fig.colorbar(surf, ax<span class="op">=</span>ax, shrink<span class="op">=</span><span class="fl">0.5</span>, aspect<span class="op">=</span><span class="dv">10</span>)</span>
-<span id="cb21-277"><a href="#cb21-277" aria-hidden="true" tabindex="-1"></a>cbar.set_label(<span class="st">"Cost Value"</span>)</span>
-<span id="cb21-278"><a href="#cb21-278" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-279"><a href="#cb21-279" aria-hidden="true" tabindex="-1"></a>ax.set_title(<span class="st">"MSE for different $</span><span class="ch">\\</span><span class="st">theta_0, </span><span class="ch">\\</span><span class="st">theta_1$"</span>)</span>
-<span id="cb21-280"><a href="#cb21-280" aria-hidden="true" tabindex="-1"></a>ax.set_xlabel(<span class="st">"$</span><span class="ch">\\</span><span class="st">theta_0$"</span>)</span>
-<span id="cb21-281"><a href="#cb21-281" aria-hidden="true" tabindex="-1"></a>ax.set_ylabel(<span class="st">"$</span><span class="ch">\\</span><span class="st">theta_1$"</span>)</span>
-<span id="cb21-282"><a href="#cb21-282" aria-hidden="true" tabindex="-1"></a>ax.set_zlabel(<span class="st">"MSE"</span>)<span class="op">;</span></span>
-<span id="cb21-283"><a href="#cb21-283" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-284"><a href="#cb21-284" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.show()</span></span>
-<span id="cb21-285"><a href="#cb21-285" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb21-286"><a href="#cb21-286" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-289"><a href="#cb21-289" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-290"><a href="#cb21-290" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb21-291"><a href="#cb21-291" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-292"><a href="#cb21-292" aria-hidden="true" tabindex="-1"></a><span class="co"># Predictions</span></span>
-<span id="cb21-293"><a href="#cb21-293" aria-hidden="true" tabindex="-1"></a>yobs <span class="op">=</span> data_linear[<span class="st">"Age"</span>]  <span class="co"># The true observations y</span></span>
-<span id="cb21-294"><a href="#cb21-294" aria-hidden="true" tabindex="-1"></a>xs <span class="op">=</span> data_linear[<span class="st">"Length"</span>]  <span class="co"># Needed for linear predictions</span></span>
-<span id="cb21-295"><a href="#cb21-295" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="bu">len</span>(yobs)  <span class="co"># Predictions</span></span>
-<span id="cb21-296"><a href="#cb21-296" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-297"><a href="#cb21-297" aria-hidden="true" tabindex="-1"></a>yhats_constant <span class="op">=</span> [thetahat <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(n)]  <span class="co"># Not used, but food for thought</span></span>
-<span id="cb21-298"><a href="#cb21-298" aria-hidden="true" tabindex="-1"></a>yhats_linear <span class="op">=</span> [theta_0_hat <span class="op">+</span> theta_1_hat <span class="op">*</span> x <span class="cf">for</span> x <span class="kw">in</span> xs]</span>
-<span id="cb21-299"><a href="#cb21-299" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb21-300"><a href="#cb21-300" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-303"><a href="#cb21-303" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-304"><a href="#cb21-304" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb21-305"><a href="#cb21-305" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-306"><a href="#cb21-306" aria-hidden="true" tabindex="-1"></a><span class="co"># Constant Model Rug Plot</span></span>
-<span id="cb21-307"><a href="#cb21-307" aria-hidden="true" tabindex="-1"></a><span class="co"># In case we're in a weird style state</span></span>
-<span id="cb21-308"><a href="#cb21-308" aria-hidden="true" tabindex="-1"></a>sns.set_theme()</span>
-<span id="cb21-309"><a href="#cb21-309" aria-hidden="true" tabindex="-1"></a>adjust_fontsize(size<span class="op">=</span><span class="dv">16</span>)</span>
-<span id="cb21-310"><a href="#cb21-310" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>matplotlib inline</span>
-<span id="cb21-311"><a href="#cb21-311" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-312"><a href="#cb21-312" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> plt.figure(figsize<span class="op">=</span>(<span class="dv">8</span>, <span class="fl">1.5</span>))</span>
-<span id="cb21-313"><a href="#cb21-313" aria-hidden="true" tabindex="-1"></a>sns.rugplot(yobs, height<span class="op">=</span><span class="fl">0.25</span>, lw<span class="op">=</span><span class="dv">2</span>) <span class="op">;</span></span>
-<span id="cb21-314"><a href="#cb21-314" aria-hidden="true" tabindex="-1"></a>plt.axvline(thetahat, color<span class="op">=</span><span class="st">'red'</span>, lw<span class="op">=</span><span class="dv">4</span>, label<span class="op">=</span><span class="vs">r"$\hat{\theta}_0$"</span>)<span class="op">;</span></span>
-<span id="cb21-315"><a href="#cb21-315" aria-hidden="true" tabindex="-1"></a>plt.legend()</span>
-<span id="cb21-316"><a href="#cb21-316" aria-hidden="true" tabindex="-1"></a>plt.yticks([])<span class="op">;</span></span>
-<span id="cb21-317"><a href="#cb21-317" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.show()</span></span>
-<span id="cb21-318"><a href="#cb21-318" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb21-319"><a href="#cb21-319" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-322"><a href="#cb21-322" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-323"><a href="#cb21-323" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb21-324"><a href="#cb21-324" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-325"><a href="#cb21-325" aria-hidden="true" tabindex="-1"></a><span class="co"># SLR model scatter plot </span></span>
-<span id="cb21-326"><a href="#cb21-326" aria-hidden="true" tabindex="-1"></a><span class="co"># In case we're in a weird style state</span></span>
-<span id="cb21-327"><a href="#cb21-327" aria-hidden="true" tabindex="-1"></a>sns.set_theme()</span>
-<span id="cb21-328"><a href="#cb21-328" aria-hidden="true" tabindex="-1"></a>adjust_fontsize(size<span class="op">=</span><span class="dv">16</span>)</span>
-<span id="cb21-329"><a href="#cb21-329" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>matplotlib inline</span>
-<span id="cb21-330"><a href="#cb21-330" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-331"><a href="#cb21-331" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(x<span class="op">=</span>xs, y<span class="op">=</span>yobs)</span>
-<span id="cb21-332"><a href="#cb21-332" aria-hidden="true" tabindex="-1"></a>plt.plot(xs, yhats_linear, color<span class="op">=</span><span class="st">'red'</span>, lw<span class="op">=</span><span class="dv">4</span>)<span class="op">;</span></span>
-<span id="cb21-333"><a href="#cb21-333" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.savefig('dugong_line.png', bbox_inches = 'tight');</span></span>
-<span id="cb21-334"><a href="#cb21-334" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.show()</span></span>
-<span id="cb21-335"><a href="#cb21-335" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-275"><a href="#cb21-275" aria-hidden="true" tabindex="-1"></a>ax.set_title(<span class="st">"MSE for different $</span><span class="ch">\\</span><span class="st">theta_0, </span><span class="ch">\\</span><span class="st">theta_1$"</span>)</span>
+<span id="cb21-276"><a href="#cb21-276" aria-hidden="true" tabindex="-1"></a>ax.set_xlabel(<span class="st">"$</span><span class="ch">\\</span><span class="st">theta_0$"</span>)</span>
+<span id="cb21-277"><a href="#cb21-277" aria-hidden="true" tabindex="-1"></a>ax.set_ylabel(<span class="st">"$</span><span class="ch">\\</span><span class="st">theta_1$"</span>)</span>
+<span id="cb21-278"><a href="#cb21-278" aria-hidden="true" tabindex="-1"></a>ax.set_zlabel(<span class="st">"MSE"</span>)<span class="op">;</span></span>
+<span id="cb21-279"><a href="#cb21-279" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-280"><a href="#cb21-280" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.show()</span></span>
+<span id="cb21-281"><a href="#cb21-281" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-282"><a href="#cb21-282" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-285"><a href="#cb21-285" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-286"><a href="#cb21-286" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb21-287"><a href="#cb21-287" aria-hidden="true" tabindex="-1"></a><span class="co"># Predictions</span></span>
+<span id="cb21-288"><a href="#cb21-288" aria-hidden="true" tabindex="-1"></a>yobs <span class="op">=</span> data_linear[<span class="st">"Age"</span>]  <span class="co"># The true observations y</span></span>
+<span id="cb21-289"><a href="#cb21-289" aria-hidden="true" tabindex="-1"></a>xs <span class="op">=</span> data_linear[<span class="st">"Length"</span>]  <span class="co"># Needed for linear predictions</span></span>
+<span id="cb21-290"><a href="#cb21-290" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="bu">len</span>(yobs)  <span class="co"># Predictions</span></span>
+<span id="cb21-291"><a href="#cb21-291" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-292"><a href="#cb21-292" aria-hidden="true" tabindex="-1"></a>yhats_constant <span class="op">=</span> [thetahat <span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(n)]  <span class="co"># Not used, but food for thought</span></span>
+<span id="cb21-293"><a href="#cb21-293" aria-hidden="true" tabindex="-1"></a>yhats_linear <span class="op">=</span> [theta_0_hat <span class="op">+</span> theta_1_hat <span class="op">*</span> x <span class="cf">for</span> x <span class="kw">in</span> xs]</span>
+<span id="cb21-294"><a href="#cb21-294" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-295"><a href="#cb21-295" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-298"><a href="#cb21-298" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-299"><a href="#cb21-299" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb21-300"><a href="#cb21-300" aria-hidden="true" tabindex="-1"></a><span class="co"># Constant Model Rug Plot</span></span>
+<span id="cb21-301"><a href="#cb21-301" aria-hidden="true" tabindex="-1"></a><span class="co"># In case we're in a weird style state</span></span>
+<span id="cb21-302"><a href="#cb21-302" aria-hidden="true" tabindex="-1"></a>sns.set_theme()</span>
+<span id="cb21-303"><a href="#cb21-303" aria-hidden="true" tabindex="-1"></a>adjust_fontsize(size<span class="op">=</span><span class="dv">16</span>)</span>
+<span id="cb21-304"><a href="#cb21-304" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>matplotlib inline</span>
+<span id="cb21-305"><a href="#cb21-305" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-306"><a href="#cb21-306" aria-hidden="true" tabindex="-1"></a>fig <span class="op">=</span> plt.figure(figsize<span class="op">=</span>(<span class="dv">8</span>, <span class="fl">1.5</span>))</span>
+<span id="cb21-307"><a href="#cb21-307" aria-hidden="true" tabindex="-1"></a>sns.rugplot(yobs, height<span class="op">=</span><span class="fl">0.25</span>, lw<span class="op">=</span><span class="dv">2</span>) <span class="op">;</span></span>
+<span id="cb21-308"><a href="#cb21-308" aria-hidden="true" tabindex="-1"></a>plt.axvline(thetahat, color<span class="op">=</span><span class="st">'red'</span>, lw<span class="op">=</span><span class="dv">4</span>, label<span class="op">=</span><span class="vs">r"$\hat{\theta}_0$"</span>)<span class="op">;</span></span>
+<span id="cb21-309"><a href="#cb21-309" aria-hidden="true" tabindex="-1"></a>plt.legend()</span>
+<span id="cb21-310"><a href="#cb21-310" aria-hidden="true" tabindex="-1"></a>plt.yticks([])<span class="op">;</span></span>
+<span id="cb21-311"><a href="#cb21-311" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.show()</span></span>
+<span id="cb21-312"><a href="#cb21-312" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-313"><a href="#cb21-313" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-316"><a href="#cb21-316" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-317"><a href="#cb21-317" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb21-318"><a href="#cb21-318" aria-hidden="true" tabindex="-1"></a><span class="co"># SLR model scatter plot </span></span>
+<span id="cb21-319"><a href="#cb21-319" aria-hidden="true" tabindex="-1"></a><span class="co"># In case we're in a weird style state</span></span>
+<span id="cb21-320"><a href="#cb21-320" aria-hidden="true" tabindex="-1"></a>sns.set_theme()</span>
+<span id="cb21-321"><a href="#cb21-321" aria-hidden="true" tabindex="-1"></a>adjust_fontsize(size<span class="op">=</span><span class="dv">16</span>)</span>
+<span id="cb21-322"><a href="#cb21-322" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>matplotlib inline</span>
+<span id="cb21-323"><a href="#cb21-323" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-324"><a href="#cb21-324" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(x<span class="op">=</span>xs, y<span class="op">=</span>yobs)</span>
+<span id="cb21-325"><a href="#cb21-325" aria-hidden="true" tabindex="-1"></a>plt.plot(xs, yhats_linear, color<span class="op">=</span><span class="st">'red'</span>, lw<span class="op">=</span><span class="dv">4</span>)<span class="op">;</span></span>
+<span id="cb21-326"><a href="#cb21-326" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.savefig('dugong_line.png', bbox_inches = 'tight');</span></span>
+<span id="cb21-327"><a href="#cb21-327" aria-hidden="true" tabindex="-1"></a><span class="co"># plt.show()</span></span>
+<span id="cb21-328"><a href="#cb21-328" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-329"><a href="#cb21-329" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-330"><a href="#cb21-330" aria-hidden="true" tabindex="-1"></a>Interpreting the RMSE (Root Mean Squared Error):</span>
+<span id="cb21-331"><a href="#cb21-331" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-332"><a href="#cb21-332" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Because the constant error is **HIGHER** than the linear error,</span>
+<span id="cb21-333"><a href="#cb21-333" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>The constant model is **WORSE** than the linear model (at least for this metric).</span>
+<span id="cb21-334"><a href="#cb21-334" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-335"><a href="#cb21-335" aria-hidden="true" tabindex="-1"></a><span class="fu">## Constant Model + MAE</span></span>
 <span id="cb21-336"><a href="#cb21-336" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-337"><a href="#cb21-337" aria-hidden="true" tabindex="-1"></a>Interpreting the RMSE (Root Mean Squared Error):</span>
+<span id="cb21-337"><a href="#cb21-337" aria-hidden="true" tabindex="-1"></a>We see now that changing the model used for prediction leads to a wildly different result for the optimal model parameter. What happens if we instead change the loss function used in model evaluation?</span>
 <span id="cb21-338"><a href="#cb21-338" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-339"><a href="#cb21-339" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Because the constant error is **HIGHER** than the linear error,</span>
-<span id="cb21-340"><a href="#cb21-340" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>The constant model is **WORSE** than the linear model (at least for this metric).</span>
-<span id="cb21-341"><a href="#cb21-341" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-342"><a href="#cb21-342" aria-hidden="true" tabindex="-1"></a><span class="fu">## Constant Model + MAE</span></span>
-<span id="cb21-343"><a href="#cb21-343" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-344"><a href="#cb21-344" aria-hidden="true" tabindex="-1"></a>We see now that changing the model used for prediction leads to a wildly different result for the optimal model parameter. What happens if we instead change the loss function used in model evaluation?</span>
+<span id="cb21-339"><a href="#cb21-339" aria-hidden="true" tabindex="-1"></a>This time, we will consider the constant model with L1 (absolute loss) as the loss function. This means that the average loss will be expressed as the **Mean Absolute Error (MAE)**.</span>
+<span id="cb21-340"><a href="#cb21-340" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-341"><a href="#cb21-341" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Choose a model: constant model</span>
+<span id="cb21-342"><a href="#cb21-342" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Choose a loss function: L1 loss</span>
+<span id="cb21-343"><a href="#cb21-343" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Fit the model</span>
+<span id="cb21-344"><a href="#cb21-344" aria-hidden="true" tabindex="-1"></a><span class="ss">4. </span>Evaluate model performance</span>
 <span id="cb21-345"><a href="#cb21-345" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-346"><a href="#cb21-346" aria-hidden="true" tabindex="-1"></a>This time, we will consider the constant model with L1 (absolute loss) as the loss function. This means that the average loss will be expressed as the **Mean Absolute Error (MAE)**.</span>
+<span id="cb21-346"><a href="#cb21-346" aria-hidden="true" tabindex="-1"></a><span class="fu">### Deriving the optimal $\theta_0$</span></span>
 <span id="cb21-347"><a href="#cb21-347" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-348"><a href="#cb21-348" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Choose a model: constant model</span>
-<span id="cb21-349"><a href="#cb21-349" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Choose a loss function: L1 loss</span>
-<span id="cb21-350"><a href="#cb21-350" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Fit the model</span>
-<span id="cb21-351"><a href="#cb21-351" aria-hidden="true" tabindex="-1"></a><span class="ss">4. </span>Evaluate model performance</span>
-<span id="cb21-352"><a href="#cb21-352" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-353"><a href="#cb21-353" aria-hidden="true" tabindex="-1"></a><span class="fu">### Deriving the optimal $\theta_0$</span></span>
-<span id="cb21-354"><a href="#cb21-354" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-355"><a href="#cb21-355" aria-hidden="true" tabindex="-1"></a>Recall that the MAE is average **absolute** loss (L1 loss) over the data $D = <span class="sc">\{</span>y_1, y_2, ..., y_n<span class="sc">\}</span>$.</span>
-<span id="cb21-356"><a href="#cb21-356" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-357"><a href="#cb21-357" aria-hidden="true" tabindex="-1"></a>$$\hat{R}(\theta_0) = \frac{1}{n}\sum^{n}_{i=1} |y_i - \hat{y_i}| $$</span>
-<span id="cb21-358"><a href="#cb21-358" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-359"><a href="#cb21-359" aria-hidden="true" tabindex="-1"></a>Given the constant model $\hat{y} = \theta_0$, we can write the MAE as:</span>
-<span id="cb21-360"><a href="#cb21-360" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-361"><a href="#cb21-361" aria-hidden="true" tabindex="-1"></a>$$\hat{R}(\theta_0) = \frac{1}{n}\sum^{n}_{i=1} |y_i - \theta_0| $$</span>
-<span id="cb21-362"><a href="#cb21-362" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-363"><a href="#cb21-363" aria-hidden="true" tabindex="-1"></a>To fit the model, we find the optimal parameter value $\hat{\theta_0}$ that minimizes the MAE by differentiating using a calculus approach:</span>
-<span id="cb21-364"><a href="#cb21-364" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-365"><a href="#cb21-365" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Differentiate with respect to $\hat{\theta_0}$:</span>
-<span id="cb21-366"><a href="#cb21-366" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-367"><a href="#cb21-367" aria-hidden="true" tabindex="-1"></a>$$</span>
-<span id="cb21-368"><a href="#cb21-368" aria-hidden="true" tabindex="-1"></a>\begin{align}</span>
-<span id="cb21-369"><a href="#cb21-369" aria-hidden="true" tabindex="-1"></a>\hat{R}(\theta_0) &amp;= \frac{1}{n}\sum^{n}_{i=1} |y_i - \theta_0| <span class="sc">\\</span></span>
-<span id="cb21-370"><a href="#cb21-370" aria-hidden="true" tabindex="-1"></a>\frac{d}{d\theta_0} R(\theta_0) &amp;= \frac{d}{d\theta_0} \left(\frac{1}{n} \sum^{n}_{i=1} |y_i - \theta_0| \right) <span class="sc">\\</span></span>
-<span id="cb21-371"><a href="#cb21-371" aria-hidden="true" tabindex="-1"></a>&amp;= \frac{1}{n} \sum^{n}_{i=1} \frac{d}{d\theta_0} |y_i - \theta_0|</span>
-<span id="cb21-372"><a href="#cb21-372" aria-hidden="true" tabindex="-1"></a>\end{align}</span>
-<span id="cb21-373"><a href="#cb21-373" aria-hidden="true" tabindex="-1"></a>$$</span>
+<span id="cb21-348"><a href="#cb21-348" aria-hidden="true" tabindex="-1"></a>Recall that the MAE is average **absolute** loss (L1 loss) over the data $D = <span class="sc">\{</span>y_1, y_2, ..., y_n<span class="sc">\}</span>$.</span>
+<span id="cb21-349"><a href="#cb21-349" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-350"><a href="#cb21-350" aria-hidden="true" tabindex="-1"></a>$$\hat{R}(\theta_0) = \frac{1}{n}\sum^{n}_{i=1} |y_i - \hat{y_i}| $$</span>
+<span id="cb21-351"><a href="#cb21-351" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-352"><a href="#cb21-352" aria-hidden="true" tabindex="-1"></a>Given the constant model $\hat{y} = \theta_0$, we can write the MAE as:</span>
+<span id="cb21-353"><a href="#cb21-353" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-354"><a href="#cb21-354" aria-hidden="true" tabindex="-1"></a>$$\hat{R}(\theta_0) = \frac{1}{n}\sum^{n}_{i=1} |y_i - \theta_0| $$</span>
+<span id="cb21-355"><a href="#cb21-355" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-356"><a href="#cb21-356" aria-hidden="true" tabindex="-1"></a>To fit the model, we find the optimal parameter value $\hat{\theta_0}$ that minimizes the MAE by differentiating using a calculus approach:</span>
+<span id="cb21-357"><a href="#cb21-357" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-358"><a href="#cb21-358" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Differentiate with respect to $\hat{\theta_0}$:</span>
+<span id="cb21-359"><a href="#cb21-359" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-360"><a href="#cb21-360" aria-hidden="true" tabindex="-1"></a>$$</span>
+<span id="cb21-361"><a href="#cb21-361" aria-hidden="true" tabindex="-1"></a>\begin{align}</span>
+<span id="cb21-362"><a href="#cb21-362" aria-hidden="true" tabindex="-1"></a>\hat{R}(\theta_0) &amp;= \frac{1}{n}\sum^{n}_{i=1} |y_i - \theta_0| <span class="sc">\\</span></span>
+<span id="cb21-363"><a href="#cb21-363" aria-hidden="true" tabindex="-1"></a>\frac{d}{d\theta_0} R(\theta_0) &amp;= \frac{d}{d\theta_0} \left(\frac{1}{n} \sum^{n}_{i=1} |y_i - \theta_0| \right) <span class="sc">\\</span></span>
+<span id="cb21-364"><a href="#cb21-364" aria-hidden="true" tabindex="-1"></a>&amp;= \frac{1}{n} \sum^{n}_{i=1} \frac{d}{d\theta_0} |y_i - \theta_0|</span>
+<span id="cb21-365"><a href="#cb21-365" aria-hidden="true" tabindex="-1"></a>\end{align}</span>
+<span id="cb21-366"><a href="#cb21-366" aria-hidden="true" tabindex="-1"></a>$$</span>
+<span id="cb21-367"><a href="#cb21-367" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-368"><a href="#cb21-368" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Here, we seem to have run into a problem: the derivative of an absolute value is undefined when the argument is 0 (i.e. when $y_i = \theta_0$). For now, we'll ignore this issue. It turns out that disregarding this case doesn't influence our final result.</span>
+<span id="cb21-369"><a href="#cb21-369" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>To perform the derivative, consider two cases. When $\theta_0$ is *less than or equal to* $y_i$, the term $y_i - \theta_0$ will be positive and the absolute value has no impact. When $\theta_0$ is *greater than* $y_i$, the term $y_i - \theta_0$ will be negative. Applying the absolute value will convert this to a positive value, which we can express by saying $-(y_i - \theta_0) = \theta_0 - y_i$.</span>
+<span id="cb21-370"><a href="#cb21-370" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-371"><a href="#cb21-371" aria-hidden="true" tabindex="-1"></a>$$|y_i - \theta_0| = \begin{cases} y_i - \theta_0 \quad \text{ if } \theta_0 \le y_i <span class="sc">\\</span> \theta_0 - y_i \quad \text{if }\theta_0 &gt; y_i \end{cases}$$</span>
+<span id="cb21-372"><a href="#cb21-372" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-373"><a href="#cb21-373" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Taking derivatives:</span>
 <span id="cb21-374"><a href="#cb21-374" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-375"><a href="#cb21-375" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Here, we seem to have run into a problem: the derivative of an absolute value is undefined when the argument is 0 (i.e. when $y_i = \theta_0$). For now, we'll ignore this issue. It turns out that disregarding this case doesn't influence our final result.</span>
-<span id="cb21-376"><a href="#cb21-376" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>To perform the derivative, consider two cases. When $\theta_0$ is *less than or equal to* $y_i$, the term $y_i - \theta_0$ will be positive and the absolute value has no impact. When $\theta_0$ is *greater than* $y_i$, the term $y_i - \theta_0$ will be negative. Applying the absolute value will convert this to a positive value, which we can express by saying $-(y_i - \theta_0) = \theta_0 - y_i$.</span>
-<span id="cb21-377"><a href="#cb21-377" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-378"><a href="#cb21-378" aria-hidden="true" tabindex="-1"></a>$$|y_i - \theta_0| = \begin{cases} y_i - \theta_0 \quad \text{ if } \theta_0 \le y_i <span class="sc">\\</span> \theta_0 - y_i \quad \text{if }\theta_0 &gt; y_i \end{cases}$$</span>
-<span id="cb21-379"><a href="#cb21-379" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-380"><a href="#cb21-380" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Taking derivatives:</span>
-<span id="cb21-381"><a href="#cb21-381" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-382"><a href="#cb21-382" aria-hidden="true" tabindex="-1"></a>$$\frac{d}{d\theta_0} |y_i - \theta_0| = \begin{cases} \frac{d}{d\theta_0} (y_i - \theta_0) = -1 \quad \text{if }\theta_0 &lt; y_i <span class="sc">\\</span> \frac{d}{d\theta_0} (\theta_0 - y_i) = 1 \quad \text{if }\theta_0 &gt; y_i \end{cases}$$</span>
+<span id="cb21-375"><a href="#cb21-375" aria-hidden="true" tabindex="-1"></a>$$\frac{d}{d\theta_0} |y_i - \theta_0| = \begin{cases} \frac{d}{d\theta_0} (y_i - \theta_0) = -1 \quad \text{if }\theta_0 &lt; y_i <span class="sc">\\</span> \frac{d}{d\theta_0} (\theta_0 - y_i) = 1 \quad \text{if }\theta_0 &gt; y_i \end{cases}$$</span>
+<span id="cb21-376"><a href="#cb21-376" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-377"><a href="#cb21-377" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>This means that we obtain a different value for the derivative for data points where $\theta_0 &lt; y_i$ and where $\theta_0 &gt; y_i$. We can summarize this by saying:</span>
+<span id="cb21-378"><a href="#cb21-378" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-379"><a href="#cb21-379" aria-hidden="true" tabindex="-1"></a>$$</span>
+<span id="cb21-380"><a href="#cb21-380" aria-hidden="true" tabindex="-1"></a>\frac{d}{d\theta_0} R(\theta_0) = \frac{1}{n} \sum^{n}_{i=1} \frac{d}{d\theta_0} |y_i - \theta_0| <span class="sc">\\</span></span>
+<span id="cb21-381"><a href="#cb21-381" aria-hidden="true" tabindex="-1"></a>= \frac{1}{n} \left<span class="co">[</span><span class="ot">\sum_{\theta_0 &lt; y_i} (-1) + \sum_{\theta_0 &gt; y_i} (+1) \right</span><span class="co">]</span></span>
+<span id="cb21-382"><a href="#cb21-382" aria-hidden="true" tabindex="-1"></a>$$</span>
 <span id="cb21-383"><a href="#cb21-383" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-384"><a href="#cb21-384" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>This means that we obtain a different value for the derivative for data points where $\theta_0 &lt; y_i$ and where $\theta_0 &gt; y_i$. We can summarize this by saying:</span>
-<span id="cb21-385"><a href="#cb21-385" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-386"><a href="#cb21-386" aria-hidden="true" tabindex="-1"></a>$$</span>
-<span id="cb21-387"><a href="#cb21-387" aria-hidden="true" tabindex="-1"></a>\frac{d}{d\theta_0} R(\theta_0) = \frac{1}{n} \sum^{n}_{i=1} \frac{d}{d\theta_0} |y_i - \theta_0| <span class="sc">\\</span></span>
-<span id="cb21-388"><a href="#cb21-388" aria-hidden="true" tabindex="-1"></a>= \frac{1}{n} \left<span class="co">[</span><span class="ot">\sum_{\theta_0 &lt; y_i} (-1) + \sum_{\theta_0 &gt; y_i} (+1) \right</span><span class="co">]</span></span>
-<span id="cb21-389"><a href="#cb21-389" aria-hidden="true" tabindex="-1"></a>$$</span>
+<span id="cb21-384"><a href="#cb21-384" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>In other words, we take the sum of values for $i = 1, 2, ..., n$:</span>
+<span id="cb21-385"><a href="#cb21-385" aria-hidden="true" tabindex="-1"></a><span class="ss">  - </span>$-1$ if our observation $y_i$ is *greater than* our prediction $\hat{\theta_0}$</span>
+<span id="cb21-386"><a href="#cb21-386" aria-hidden="true" tabindex="-1"></a><span class="ss">  - </span>$+1$ if our observation $y_i$ is *smaller than* our prediction $\hat{\theta_0}$</span>
+<span id="cb21-387"><a href="#cb21-387" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-388"><a href="#cb21-388" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Set the derivative equation equal to 0:</span>
+<span id="cb21-389"><a href="#cb21-389" aria-hidden="true" tabindex="-1"></a>   $$ 0 = \frac{1}{n}\sum_{\hat{\theta_0} &lt; y_i} (-1) + \frac{1}{n}\sum_{\hat{\theta_0} &gt; y_i} (+1) $$</span>
 <span id="cb21-390"><a href="#cb21-390" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-391"><a href="#cb21-391" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>In other words, we take the sum of values for $i = 1, 2, ..., n$:</span>
-<span id="cb21-392"><a href="#cb21-392" aria-hidden="true" tabindex="-1"></a><span class="ss">  - </span>$-1$ if our observation $y_i$ is *greater than* our prediction $\hat{\theta_0}$</span>
-<span id="cb21-393"><a href="#cb21-393" aria-hidden="true" tabindex="-1"></a><span class="ss">  - </span>$+1$ if our observation $y_i$ is *smaller than* our prediction $\hat{\theta_0}$</span>
-<span id="cb21-394"><a href="#cb21-394" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-395"><a href="#cb21-395" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Set the derivative equation equal to 0:</span>
-<span id="cb21-396"><a href="#cb21-396" aria-hidden="true" tabindex="-1"></a>   $$ 0 = \frac{1}{n}\sum_{\hat{\theta_0} &lt; y_i} (-1) + \frac{1}{n}\sum_{\hat{\theta_0} &gt; y_i} (+1) $$</span>
+<span id="cb21-391"><a href="#cb21-391" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Solve for $\hat{\theta_0}$:</span>
+<span id="cb21-392"><a href="#cb21-392" aria-hidden="true" tabindex="-1"></a>   $$ 0 = -\frac{1}{n}\sum_{\hat{\theta_0} &lt; y_i} (1) + \frac{1}{n}\sum_{\hat{\theta_0} &gt; y_i} (1)$$</span>
+<span id="cb21-393"><a href="#cb21-393" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-394"><a href="#cb21-394" aria-hidden="true" tabindex="-1"></a>$$\sum_{\hat{\theta_0} &lt; y_i} (1) = \sum_{\hat{\theta_0} &gt; y_i} (1) $$</span>
+<span id="cb21-395"><a href="#cb21-395" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-396"><a href="#cb21-396" aria-hidden="true" tabindex="-1"></a>Thus, the constant model parameter $\theta = \hat{\theta_0}$ that minimizes MAE must satisfy:</span>
 <span id="cb21-397"><a href="#cb21-397" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-398"><a href="#cb21-398" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Solve for $\hat{\theta_0}$:</span>
-<span id="cb21-399"><a href="#cb21-399" aria-hidden="true" tabindex="-1"></a>   $$ 0 = -\frac{1}{n}\sum_{\hat{\theta_0} &lt; y_i} (1) + \frac{1}{n}\sum_{\hat{\theta_0} &gt; y_i} (1)$$</span>
-<span id="cb21-400"><a href="#cb21-400" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-401"><a href="#cb21-401" aria-hidden="true" tabindex="-1"></a>$$\sum_{\hat{\theta_0} &lt; y_i} (1) = \sum_{\hat{\theta_0} &gt; y_i} (1) $$</span>
+<span id="cb21-398"><a href="#cb21-398" aria-hidden="true" tabindex="-1"></a>$$ \sum_{\hat{\theta_0} &lt; y_i} (1) = \sum_{\hat{\theta_0} &gt; y_i} (1) $$</span>
+<span id="cb21-399"><a href="#cb21-399" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-400"><a href="#cb21-400" aria-hidden="true" tabindex="-1"></a>In other words, the number of observations greater than $\theta_0$ must be equal to the number of observations less than $\theta_0$; there must be an equal number of points on the left and right sides of the equation. This is the definition of median, so our optimal value is</span>
+<span id="cb21-401"><a href="#cb21-401" aria-hidden="true" tabindex="-1"></a>$$ \hat{\theta}_0 = median(y) $$</span>
 <span id="cb21-402"><a href="#cb21-402" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-403"><a href="#cb21-403" aria-hidden="true" tabindex="-1"></a>Thus, the constant model parameter $\theta = \hat{\theta_0}$ that minimizes MAE must satisfy:</span>
+<span id="cb21-403"><a href="#cb21-403" aria-hidden="true" tabindex="-1"></a><span class="fu">## Summary: Loss Optimization, Calculus, and Critical Points</span></span>
 <span id="cb21-404"><a href="#cb21-404" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-405"><a href="#cb21-405" aria-hidden="true" tabindex="-1"></a>$$ \sum_{\hat{\theta_0} &lt; y_i} (1) = \sum_{\hat{\theta_0} &gt; y_i} (1) $$</span>
+<span id="cb21-405"><a href="#cb21-405" aria-hidden="true" tabindex="-1"></a>First, define the **objective function** as average loss.</span>
 <span id="cb21-406"><a href="#cb21-406" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-407"><a href="#cb21-407" aria-hidden="true" tabindex="-1"></a>In other words, the number of observations greater than $\theta_0$ must be equal to the number of observations less than $\theta_0$; there must be an equal number of points on the left and right sides of the equation. This is the definition of median, so our optimal value is</span>
-<span id="cb21-408"><a href="#cb21-408" aria-hidden="true" tabindex="-1"></a>$$ \hat{\theta}_0 = median(y) $$</span>
+<span id="cb21-407"><a href="#cb21-407" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Plug in L1 or L2 loss.</span>
+<span id="cb21-408"><a href="#cb21-408" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Plug in the model so that the resulting expression is a function of $\theta$.</span>
 <span id="cb21-409"><a href="#cb21-409" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-410"><a href="#cb21-410" aria-hidden="true" tabindex="-1"></a><span class="fu">## Summary: Loss Optimization, Calculus, and Critical Points</span></span>
+<span id="cb21-410"><a href="#cb21-410" aria-hidden="true" tabindex="-1"></a>Then, find the minimum of the objective function:</span>
 <span id="cb21-411"><a href="#cb21-411" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-412"><a href="#cb21-412" aria-hidden="true" tabindex="-1"></a>First, define the **objective function** as average loss.</span>
-<span id="cb21-413"><a href="#cb21-413" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-414"><a href="#cb21-414" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Plug in L1 or L2 loss.</span>
-<span id="cb21-415"><a href="#cb21-415" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Plug in the model so that the resulting expression is a function of $\theta$.</span>
+<span id="cb21-412"><a href="#cb21-412" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Differentiate with respect to $\theta$.</span>
+<span id="cb21-413"><a href="#cb21-413" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Set equal to 0.</span>
+<span id="cb21-414"><a href="#cb21-414" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Solve for $\hat{\theta}$.</span>
+<span id="cb21-415"><a href="#cb21-415" aria-hidden="true" tabindex="-1"></a><span class="ss">4. </span>(If we have multiple parameters) repeat steps 1-3 with partial derivatives.</span>
 <span id="cb21-416"><a href="#cb21-416" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-417"><a href="#cb21-417" aria-hidden="true" tabindex="-1"></a>Then, find the minimum of the objective function:</span>
+<span id="cb21-417"><a href="#cb21-417" aria-hidden="true" tabindex="-1"></a>Recall critical points from calculus: $R(\hat{\theta})$ could be a minimum, maximum, or saddle point!</span>
 <span id="cb21-418"><a href="#cb21-418" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-419"><a href="#cb21-419" aria-hidden="true" tabindex="-1"></a><span class="ss">1. </span>Differentiate with respect to $\theta$.</span>
-<span id="cb21-420"><a href="#cb21-420" aria-hidden="true" tabindex="-1"></a><span class="ss">2. </span>Set equal to 0.</span>
-<span id="cb21-421"><a href="#cb21-421" aria-hidden="true" tabindex="-1"></a><span class="ss">3. </span>Solve for $\hat{\theta}$.</span>
-<span id="cb21-422"><a href="#cb21-422" aria-hidden="true" tabindex="-1"></a><span class="ss">4. </span>(If we have multiple parameters) repeat steps 1-3 with partial derivatives.</span>
-<span id="cb21-423"><a href="#cb21-423" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-424"><a href="#cb21-424" aria-hidden="true" tabindex="-1"></a>Recall critical points from calculus: $R(\hat{\theta})$ could be a minimum, maximum, or saddle point!</span>
-<span id="cb21-425"><a href="#cb21-425" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-426"><a href="#cb21-426" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>We should technically also perform the second derivative test, i.e., show $R''(\hat{\theta}) &gt; 0$.</span>
-<span id="cb21-427"><a href="#cb21-427" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>MSE has a property—**convexity**—that guarantees that $R(\hat{\theta})$ is a global minimum.</span>
-<span id="cb21-428"><a href="#cb21-428" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>The proof of convexity for MAE is beyond this course.</span>
-<span id="cb21-429"><a href="#cb21-429" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-430"><a href="#cb21-430" aria-hidden="true" tabindex="-1"></a><span class="fu">## Comparing Loss Functions</span></span>
-<span id="cb21-431"><a href="#cb21-431" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-432"><a href="#cb21-432" aria-hidden="true" tabindex="-1"></a>We've now tried our hand at fitting a model under both MSE and MAE cost functions. How do the two results compare?</span>
-<span id="cb21-433"><a href="#cb21-433" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-434"><a href="#cb21-434" aria-hidden="true" tabindex="-1"></a>Let's consider a dataset where each entry represents the number of drinks sold at a bubble tea store each day. We'll fit a constant model to predict the number of drinks that will be sold tomorrow.</span>
-<span id="cb21-435"><a href="#cb21-435" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-438"><a href="#cb21-438" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-439"><a href="#cb21-439" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb21-440"><a href="#cb21-440" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-441"><a href="#cb21-441" aria-hidden="true" tabindex="-1"></a>drinks <span class="op">=</span> np.array([<span class="dv">20</span>, <span class="dv">21</span>, <span class="dv">22</span>, <span class="dv">29</span>, <span class="dv">33</span>])</span>
-<span id="cb21-442"><a href="#cb21-442" aria-hidden="true" tabindex="-1"></a>drinks</span>
-<span id="cb21-443"><a href="#cb21-443" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb21-444"><a href="#cb21-444" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-445"><a href="#cb21-445" aria-hidden="true" tabindex="-1"></a>From our derivations above, we know that the optimal model parameter under MSE cost is the mean of the dataset. Under MAE cost, the optimal parameter is the median of the dataset.</span>
-<span id="cb21-446"><a href="#cb21-446" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-449"><a href="#cb21-449" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-450"><a href="#cb21-450" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb21-451"><a href="#cb21-451" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-452"><a href="#cb21-452" aria-hidden="true" tabindex="-1"></a>np.mean(drinks), np.median(drinks)</span>
-<span id="cb21-453"><a href="#cb21-453" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb21-454"><a href="#cb21-454" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-455"><a href="#cb21-455" aria-hidden="true" tabindex="-1"></a>If we plot each empirical risk function across several possible values of $\theta$, we find that each $\hat{\theta}$ does indeed correspond to the lowest value of error:</span>
-<span id="cb21-456"><a href="#cb21-456" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-457"><a href="#cb21-457" aria-hidden="true" tabindex="-1"></a>&lt;img src="images/error.png" alt='error' width='600'&gt;</span>
-<span id="cb21-458"><a href="#cb21-458" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-459"><a href="#cb21-459" aria-hidden="true" tabindex="-1"></a>Notice that the MSE above is a **smooth** function – it is differentiable at all points, making it easy to minimize using numerical methods. The MAE, in contrast, is not differentiable at each of its "kinks." We'll explore how the smoothness of the cost function can impact our ability to apply numerical optimization in a few weeks.</span>
-<span id="cb21-460"><a href="#cb21-460" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-461"><a href="#cb21-461" aria-hidden="true" tabindex="-1"></a>How do outliers affect each cost function? Imagine we replace the largest value in the dataset with 1000. The mean of the data increases substantially, while the median is nearly unaffected.</span>
+<span id="cb21-419"><a href="#cb21-419" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>We should technically also perform the second derivative test, i.e., show $R''(\hat{\theta}) &gt; 0$.</span>
+<span id="cb21-420"><a href="#cb21-420" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>MSE has a property—**convexity**—that guarantees that $R(\hat{\theta})$ is a global minimum.</span>
+<span id="cb21-421"><a href="#cb21-421" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>The proof of convexity for MAE is beyond this course.</span>
+<span id="cb21-422"><a href="#cb21-422" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-423"><a href="#cb21-423" aria-hidden="true" tabindex="-1"></a><span class="fu">## Comparing Loss Functions</span></span>
+<span id="cb21-424"><a href="#cb21-424" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-425"><a href="#cb21-425" aria-hidden="true" tabindex="-1"></a>We've now tried our hand at fitting a model under both MSE and MAE cost functions. How do the two results compare?</span>
+<span id="cb21-426"><a href="#cb21-426" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-427"><a href="#cb21-427" aria-hidden="true" tabindex="-1"></a>Let's consider a dataset where each entry represents the number of drinks sold at a bubble tea store each day. We'll fit a constant model to predict the number of drinks that will be sold tomorrow.</span>
+<span id="cb21-428"><a href="#cb21-428" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-431"><a href="#cb21-431" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-432"><a href="#cb21-432" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb21-433"><a href="#cb21-433" aria-hidden="true" tabindex="-1"></a>drinks <span class="op">=</span> np.array([<span class="dv">20</span>, <span class="dv">21</span>, <span class="dv">22</span>, <span class="dv">29</span>, <span class="dv">33</span>])</span>
+<span id="cb21-434"><a href="#cb21-434" aria-hidden="true" tabindex="-1"></a>drinks</span>
+<span id="cb21-435"><a href="#cb21-435" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-436"><a href="#cb21-436" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-437"><a href="#cb21-437" aria-hidden="true" tabindex="-1"></a>From our derivations above, we know that the optimal model parameter under MSE cost is the mean of the dataset. Under MAE cost, the optimal parameter is the median of the dataset.</span>
+<span id="cb21-438"><a href="#cb21-438" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-441"><a href="#cb21-441" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-442"><a href="#cb21-442" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb21-443"><a href="#cb21-443" aria-hidden="true" tabindex="-1"></a>np.mean(drinks), np.median(drinks)</span>
+<span id="cb21-444"><a href="#cb21-444" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-445"><a href="#cb21-445" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-446"><a href="#cb21-446" aria-hidden="true" tabindex="-1"></a>If we plot each empirical risk function across several possible values of $\theta$, we find that each $\hat{\theta}$ does indeed correspond to the lowest value of error:</span>
+<span id="cb21-447"><a href="#cb21-447" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-448"><a href="#cb21-448" aria-hidden="true" tabindex="-1"></a>&lt;img src="images/error.png" alt='error' width='600'&gt;</span>
+<span id="cb21-449"><a href="#cb21-449" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-450"><a href="#cb21-450" aria-hidden="true" tabindex="-1"></a>Notice that the MSE above is a **smooth** function – it is differentiable at all points, making it easy to minimize using numerical methods. The MAE, in contrast, is not differentiable at each of its "kinks." We'll explore how the smoothness of the cost function can impact our ability to apply numerical optimization in a few weeks.</span>
+<span id="cb21-451"><a href="#cb21-451" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-452"><a href="#cb21-452" aria-hidden="true" tabindex="-1"></a>How do outliers affect each cost function? Imagine we replace the largest value in the dataset with 1000. The mean of the data increases substantially, while the median is nearly unaffected.</span>
+<span id="cb21-453"><a href="#cb21-453" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-456"><a href="#cb21-456" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-457"><a href="#cb21-457" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb21-458"><a href="#cb21-458" aria-hidden="true" tabindex="-1"></a>drinks_with_outlier <span class="op">=</span> np.append(drinks, <span class="dv">1033</span>)</span>
+<span id="cb21-459"><a href="#cb21-459" aria-hidden="true" tabindex="-1"></a>display(drinks_with_outlier)</span>
+<span id="cb21-460"><a href="#cb21-460" aria-hidden="true" tabindex="-1"></a>np.mean(drinks_with_outlier), np.median(drinks_with_outlier)</span>
+<span id="cb21-461"><a href="#cb21-461" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
 <span id="cb21-462"><a href="#cb21-462" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-465"><a href="#cb21-465" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-466"><a href="#cb21-466" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb21-467"><a href="#cb21-467" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-468"><a href="#cb21-468" aria-hidden="true" tabindex="-1"></a>drinks_with_outlier <span class="op">=</span> np.append(drinks, <span class="dv">1033</span>)</span>
-<span id="cb21-469"><a href="#cb21-469" aria-hidden="true" tabindex="-1"></a>display(drinks_with_outlier)</span>
-<span id="cb21-470"><a href="#cb21-470" aria-hidden="true" tabindex="-1"></a>np.mean(drinks_with_outlier), np.median(drinks_with_outlier)</span>
-<span id="cb21-471"><a href="#cb21-471" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb21-472"><a href="#cb21-472" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-473"><a href="#cb21-473" aria-hidden="true" tabindex="-1"></a>&lt;img src="images/outliers.png" alt='outliers' width='700'&gt;</span>
-<span id="cb21-474"><a href="#cb21-474" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-475"><a href="#cb21-475" aria-hidden="true" tabindex="-1"></a>This means that under the MSE, the optimal model parameter $\hat{\theta}$ is strongly affected by the presence of outliers. Under the MAE, the optimal parameter is not as influenced by outlying data. We can generalize this by saying that the MSE is **sensitive** to outliers, while the MAE is **robust** to outliers.</span>
+<span id="cb21-463"><a href="#cb21-463" aria-hidden="true" tabindex="-1"></a>&lt;img src="images/outliers.png" alt='outliers' width='700'&gt;</span>
+<span id="cb21-464"><a href="#cb21-464" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-465"><a href="#cb21-465" aria-hidden="true" tabindex="-1"></a>This means that under the MSE, the optimal model parameter $\hat{\theta}$ is strongly affected by the presence of outliers. Under the MAE, the optimal parameter is not as influenced by outlying data. We can generalize this by saying that the MSE is **sensitive** to outliers, while the MAE is **robust** to outliers.</span>
+<span id="cb21-466"><a href="#cb21-466" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-467"><a href="#cb21-467" aria-hidden="true" tabindex="-1"></a>Let's try another experiment. This time, we'll add an additional, non-outlying datapoint to the data.</span>
+<span id="cb21-468"><a href="#cb21-468" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-471"><a href="#cb21-471" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-472"><a href="#cb21-472" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
+<span id="cb21-473"><a href="#cb21-473" aria-hidden="true" tabindex="-1"></a>drinks_with_additional_observation <span class="op">=</span> np.append(drinks, <span class="dv">35</span>)</span>
+<span id="cb21-474"><a href="#cb21-474" aria-hidden="true" tabindex="-1"></a>drinks_with_additional_observation</span>
+<span id="cb21-475"><a href="#cb21-475" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
 <span id="cb21-476"><a href="#cb21-476" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-477"><a href="#cb21-477" aria-hidden="true" tabindex="-1"></a>Let's try another experiment. This time, we'll add an additional, non-outlying datapoint to the data.</span>
+<span id="cb21-477"><a href="#cb21-477" aria-hidden="true" tabindex="-1"></a>When we again visualize the cost functions, we find that the MAE now plots a horizontal line between 22 and 29. This means that there are _infinitely_ many optimal values for the model parameter: any value $\hat{\theta} \in <span class="co">[</span><span class="ot">22, 29</span><span class="co">]</span>$ will minimize the MAE. In contrast, the MSE still has a single best value for $\hat{\theta}$. In other words, the MSE has a **unique** solution for $\hat{\theta}$; the MAE is not guaranteed to have a single unique solution.</span>
 <span id="cb21-478"><a href="#cb21-478" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-481"><a href="#cb21-481" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-482"><a href="#cb21-482" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: false</span></span>
-<span id="cb21-483"><a href="#cb21-483" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-484"><a href="#cb21-484" aria-hidden="true" tabindex="-1"></a>drinks_with_additional_observation <span class="op">=</span> np.append(drinks, <span class="dv">35</span>)</span>
-<span id="cb21-485"><a href="#cb21-485" aria-hidden="true" tabindex="-1"></a>drinks_with_additional_observation</span>
-<span id="cb21-486"><a href="#cb21-486" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb21-487"><a href="#cb21-487" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-488"><a href="#cb21-488" aria-hidden="true" tabindex="-1"></a>When we again visualize the cost functions, we find that the MAE now plots a horizontal line between 22 and 29. This means that there are _infinitely_ many optimal values for the model parameter: any value $\hat{\theta} \in <span class="co">[</span><span class="ot">22, 29</span><span class="co">]</span>$ will minimize the MAE. In contrast, the MSE still has a single best value for $\hat{\theta}$. In other words, the MSE has a **unique** solution for $\hat{\theta}$; the MAE is not guaranteed to have a single unique solution.</span>
-<span id="cb21-489"><a href="#cb21-489" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-490"><a href="#cb21-490" aria-hidden="true" tabindex="-1"></a>&lt;img src="images/mse_loss_26.png" width='350'&gt; &lt;img src="images/mae_loss_infinite.png" width='350'&gt;</span>
+<span id="cb21-479"><a href="#cb21-479" aria-hidden="true" tabindex="-1"></a>&lt;img src="images/mse_loss_26.png" width='350'&gt; &lt;img src="images/mae_loss_infinite.png" width='350'&gt;</span>
+<span id="cb21-480"><a href="#cb21-480" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-481"><a href="#cb21-481" aria-hidden="true" tabindex="-1"></a>To summarize our example,</span>
+<span id="cb21-482"><a href="#cb21-482" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-483"><a href="#cb21-483" aria-hidden="true" tabindex="-1"></a>|                        | MSE (Mean Squared Loss)                                                   | MAE (Mean Absolute Loss)                                                        |</span>
+<span id="cb21-484"><a href="#cb21-484" aria-hidden="true" tabindex="-1"></a>| ---------------------- | ---------------------------------------------------------------- | ------------------------------------------------------------------------------- |</span>
+<span id="cb21-485"><a href="#cb21-485" aria-hidden="true" tabindex="-1"></a>| Loss Function                  | $\hat{R}(\theta) = \frac{1}{n}\sum^{n}_{i=1} (y_i - \theta_0)^2$                                            | $\hat{R}(\theta) = \frac{1}{n}\sum^{n}_{i=1} |y_i - \theta_0|$                                               |</span>
+<span id="cb21-486"><a href="#cb21-486" aria-hidden="true" tabindex="-1"></a>| Optimal $\hat{\theta_0}$                   | $\hat{\theta_0} = mean(y) = \bar{y}$                      | $\hat{\theta_0} = median(y)$                 |</span>
+<span id="cb21-487"><a href="#cb21-487" aria-hidden="true" tabindex="-1"></a>| Loss Surface           | &lt;img src="images/mse_loss_26.png" width='250'&gt;                        | &lt;img src="images/mae_loss_infinite.png" width='250'&gt;                                          |</span>
+<span id="cb21-488"><a href="#cb21-488" aria-hidden="true" tabindex="-1"></a>| Shape            | **Smooth** - easy to minimize using numerical methods (in a few weeks)  | **Piecewise** - at each of the “kinks,” it’s not differentiable. Harder to minimize. |</span>
+<span id="cb21-489"><a href="#cb21-489" aria-hidden="true" tabindex="-1"></a>| Outliers                   | **Sensitive** to outliers (since they change mean substantially). Sensitivity also depends on the dataset size.                                                            | **More robust** to outliers.                                                                           |</span>
+<span id="cb21-490"><a href="#cb21-490" aria-hidden="true" tabindex="-1"></a>| $\hat{\theta_0}$ Uniqueness | **Unique** $\hat{\theta_0}$                              | **Infinitely many** $\hat{\theta_0}$s                                    |</span>
 <span id="cb21-491"><a href="#cb21-491" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-492"><a href="#cb21-492" aria-hidden="true" tabindex="-1"></a>To summarize our example,</span>
+<span id="cb21-492"><a href="#cb21-492" aria-hidden="true" tabindex="-1"></a><span class="fu">## Transformations to Fit Linear Models</span></span>
 <span id="cb21-493"><a href="#cb21-493" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-494"><a href="#cb21-494" aria-hidden="true" tabindex="-1"></a>|                        | MSE (Mean Squared Loss)                                                   | MAE (Mean Absolute Loss)                                                        |</span>
-<span id="cb21-495"><a href="#cb21-495" aria-hidden="true" tabindex="-1"></a>| ---------------------- | ---------------------------------------------------------------- | ------------------------------------------------------------------------------- |</span>
-<span id="cb21-496"><a href="#cb21-496" aria-hidden="true" tabindex="-1"></a>| Loss Function                  | $\hat{R}(\theta) = \frac{1}{n}\sum^{n}_{i=1} (y_i - \theta_0)^2$                                            | $\hat{R}(\theta) = \frac{1}{n}\sum^{n}_{i=1} |y_i - \theta_0|$                                               |</span>
-<span id="cb21-497"><a href="#cb21-497" aria-hidden="true" tabindex="-1"></a>| Optimal $\hat{\theta_0}$                   | $\hat{\theta_0} = mean(y) = \bar{y}$                      | $\hat{\theta_0} = median(y)$                 |</span>
-<span id="cb21-498"><a href="#cb21-498" aria-hidden="true" tabindex="-1"></a>| Loss Surface           | &lt;img src="images/mse_loss_26.png" width='250'&gt;                        | &lt;img src="images/mae_loss_infinite.png" width='250'&gt;                                          |</span>
-<span id="cb21-499"><a href="#cb21-499" aria-hidden="true" tabindex="-1"></a>| Shape            | **Smooth** - easy to minimize using numerical methods (in a few weeks)  | **Piecewise** - at each of the “kinks,” it’s not differentiable. Harder to minimize. |</span>
-<span id="cb21-500"><a href="#cb21-500" aria-hidden="true" tabindex="-1"></a>| Outliers                   | **Sensitive** to outliers (since they change mean substantially). Sensitivity also depends on the dataset size.                                                            | **More robust** to outliers.                                                                           |</span>
-<span id="cb21-501"><a href="#cb21-501" aria-hidden="true" tabindex="-1"></a>| $\hat{\theta_0}$ Uniqueness | **Unique** $\hat{\theta_0}$                              | **Infinitely many** $\hat{\theta_0}$s                                    |</span>
-<span id="cb21-502"><a href="#cb21-502" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-503"><a href="#cb21-503" aria-hidden="true" tabindex="-1"></a><span class="fu">## Transformations to Fit Linear Models</span></span>
-<span id="cb21-504"><a href="#cb21-504" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-505"><a href="#cb21-505" aria-hidden="true" tabindex="-1"></a>At this point, we have an effective method of fitting models to predict linear relationships. Given a feature variable and target, we can apply our four-step process to find the optimal model parameters.</span>
-<span id="cb21-506"><a href="#cb21-506" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-507"><a href="#cb21-507" aria-hidden="true" tabindex="-1"></a>A key word above is _linear_. When we computed parameter estimates earlier, we assumed that $x_i$ and $y_i$ shared a roughly linear relationship. Data in the real world isn't always so straightforward, but we can transform the data to try and obtain linearity.</span>
+<span id="cb21-494"><a href="#cb21-494" aria-hidden="true" tabindex="-1"></a>At this point, we have an effective method of fitting models to predict linear relationships. Given a feature variable and target, we can apply our four-step process to find the optimal model parameters.</span>
+<span id="cb21-495"><a href="#cb21-495" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-496"><a href="#cb21-496" aria-hidden="true" tabindex="-1"></a>A key word above is _linear_. When we computed parameter estimates earlier, we assumed that $x_i$ and $y_i$ shared a roughly linear relationship. Data in the real world isn't always so straightforward, but we can transform the data to try and obtain linearity.</span>
+<span id="cb21-497"><a href="#cb21-497" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-498"><a href="#cb21-498" aria-hidden="true" tabindex="-1"></a>The **Tukey-Mosteller Bulge Diagram** is a useful tool for summarizing what transformations can linearize the relationship between two variables. To determine what transformations might be appropriate, trace the shape of the "bulge" made by your data. Find the quadrant of the diagram that matches this bulge. The transformations shown on the vertical and horizontal axes of this quadrant can help improve the fit between the variables.</span>
+<span id="cb21-499"><a href="#cb21-499" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-500"><a href="#cb21-500" aria-hidden="true" tabindex="-1"></a>&lt;img src="images/bulge.png" alt='bulge' width='600'&gt;</span>
+<span id="cb21-501"><a href="#cb21-501" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-502"><a href="#cb21-502" aria-hidden="true" tabindex="-1"></a>Note that:</span>
+<span id="cb21-503"><a href="#cb21-503" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-504"><a href="#cb21-504" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>There are multiple solutions. Some will fit better than others.</span>
+<span id="cb21-505"><a href="#cb21-505" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>sqrt and log make a value “smaller.”</span>
+<span id="cb21-506"><a href="#cb21-506" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Raising to a power makes a value “bigger.”</span>
+<span id="cb21-507"><a href="#cb21-507" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Each of these transformations equates to increasing or decreasing the scale of an axis.</span>
 <span id="cb21-508"><a href="#cb21-508" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-509"><a href="#cb21-509" aria-hidden="true" tabindex="-1"></a>The **Tukey-Mosteller Bulge Diagram** is a useful tool for summarizing what transformations can linearize the relationship between two variables. To determine what transformations might be appropriate, trace the shape of the "bulge" made by your data. Find the quadrant of the diagram that matches this bulge. The transformations shown on the vertical and horizontal axes of this quadrant can help improve the fit between the variables.</span>
-<span id="cb21-510"><a href="#cb21-510" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-511"><a href="#cb21-511" aria-hidden="true" tabindex="-1"></a>&lt;img src="images/bulge.png" alt='bulge' width='600'&gt;</span>
-<span id="cb21-512"><a href="#cb21-512" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-513"><a href="#cb21-513" aria-hidden="true" tabindex="-1"></a>Note that:</span>
-<span id="cb21-514"><a href="#cb21-514" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-515"><a href="#cb21-515" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>There are multiple solutions. Some will fit better than others.</span>
-<span id="cb21-516"><a href="#cb21-516" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>sqrt and log make a value “smaller.”</span>
-<span id="cb21-517"><a href="#cb21-517" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Raising to a power makes a value “bigger.”</span>
-<span id="cb21-518"><a href="#cb21-518" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>Each of these transformations equates to increasing or decreasing the scale of an axis.</span>
-<span id="cb21-519"><a href="#cb21-519" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-520"><a href="#cb21-520" aria-hidden="true" tabindex="-1"></a>Other goals in addition to linearity are possible, for example, making data appear more symmetric.</span>
-<span id="cb21-521"><a href="#cb21-521" aria-hidden="true" tabindex="-1"></a>Linearity allows us to fit lines to the transformed data.</span>
-<span id="cb21-522"><a href="#cb21-522" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-523"><a href="#cb21-523" aria-hidden="true" tabindex="-1"></a>Let's revisit our dugongs example. The lengths and ages are plotted below:</span>
-<span id="cb21-524"><a href="#cb21-524" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-527"><a href="#cb21-527" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-528"><a href="#cb21-528" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb21-529"><a href="#cb21-529" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-530"><a href="#cb21-530" aria-hidden="true" tabindex="-1"></a><span class="co"># `corrcoef` computes the correlation coefficient between two variables</span></span>
-<span id="cb21-531"><a href="#cb21-531" aria-hidden="true" tabindex="-1"></a><span class="co"># `std` finds the standard deviation</span></span>
-<span id="cb21-532"><a href="#cb21-532" aria-hidden="true" tabindex="-1"></a>x <span class="op">=</span> dugongs[<span class="st">"Length"</span>]</span>
-<span id="cb21-533"><a href="#cb21-533" aria-hidden="true" tabindex="-1"></a>y <span class="op">=</span> dugongs[<span class="st">"Age"</span>]</span>
-<span id="cb21-534"><a href="#cb21-534" aria-hidden="true" tabindex="-1"></a>r <span class="op">=</span> np.corrcoef(x, y)[<span class="dv">0</span>, <span class="dv">1</span>]</span>
-<span id="cb21-535"><a href="#cb21-535" aria-hidden="true" tabindex="-1"></a>theta_1 <span class="op">=</span> r <span class="op">*</span> np.std(y) <span class="op">/</span> np.std(x)</span>
-<span id="cb21-536"><a href="#cb21-536" aria-hidden="true" tabindex="-1"></a>theta_0 <span class="op">=</span> np.mean(y) <span class="op">-</span> theta_1 <span class="op">*</span> np.mean(x)</span>
-<span id="cb21-537"><a href="#cb21-537" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-538"><a href="#cb21-538" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots(<span class="dv">1</span>, <span class="dv">2</span>, dpi<span class="op">=</span><span class="dv">200</span>, figsize<span class="op">=</span>(<span class="dv">8</span>, <span class="dv">3</span>))</span>
-<span id="cb21-539"><a href="#cb21-539" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].scatter(x, y)</span>
-<span id="cb21-540"><a href="#cb21-540" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].set_xlabel(<span class="st">"Length"</span>)</span>
-<span id="cb21-541"><a href="#cb21-541" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].set_ylabel(<span class="st">"Age"</span>)</span>
+<span id="cb21-509"><a href="#cb21-509" aria-hidden="true" tabindex="-1"></a>Other goals in addition to linearity are possible, for example, making data appear more symmetric.</span>
+<span id="cb21-510"><a href="#cb21-510" aria-hidden="true" tabindex="-1"></a>Linearity allows us to fit lines to the transformed data.</span>
+<span id="cb21-511"><a href="#cb21-511" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-512"><a href="#cb21-512" aria-hidden="true" tabindex="-1"></a>Let's revisit our dugongs example. The lengths and ages are plotted below:</span>
+<span id="cb21-513"><a href="#cb21-513" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-516"><a href="#cb21-516" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-517"><a href="#cb21-517" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb21-518"><a href="#cb21-518" aria-hidden="true" tabindex="-1"></a><span class="co"># `corrcoef` computes the correlation coefficient between two variables</span></span>
+<span id="cb21-519"><a href="#cb21-519" aria-hidden="true" tabindex="-1"></a><span class="co"># `std` finds the standard deviation</span></span>
+<span id="cb21-520"><a href="#cb21-520" aria-hidden="true" tabindex="-1"></a>x <span class="op">=</span> dugongs[<span class="st">"Length"</span>]</span>
+<span id="cb21-521"><a href="#cb21-521" aria-hidden="true" tabindex="-1"></a>y <span class="op">=</span> dugongs[<span class="st">"Age"</span>]</span>
+<span id="cb21-522"><a href="#cb21-522" aria-hidden="true" tabindex="-1"></a>r <span class="op">=</span> np.corrcoef(x, y)[<span class="dv">0</span>, <span class="dv">1</span>]</span>
+<span id="cb21-523"><a href="#cb21-523" aria-hidden="true" tabindex="-1"></a>theta_1 <span class="op">=</span> r <span class="op">*</span> np.std(y) <span class="op">/</span> np.std(x)</span>
+<span id="cb21-524"><a href="#cb21-524" aria-hidden="true" tabindex="-1"></a>theta_0 <span class="op">=</span> np.mean(y) <span class="op">-</span> theta_1 <span class="op">*</span> np.mean(x)</span>
+<span id="cb21-525"><a href="#cb21-525" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-526"><a href="#cb21-526" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots(<span class="dv">1</span>, <span class="dv">2</span>, dpi<span class="op">=</span><span class="dv">200</span>, figsize<span class="op">=</span>(<span class="dv">8</span>, <span class="dv">3</span>))</span>
+<span id="cb21-527"><a href="#cb21-527" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].scatter(x, y)</span>
+<span id="cb21-528"><a href="#cb21-528" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].set_xlabel(<span class="st">"Length"</span>)</span>
+<span id="cb21-529"><a href="#cb21-529" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].set_ylabel(<span class="st">"Age"</span>)</span>
+<span id="cb21-530"><a href="#cb21-530" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-531"><a href="#cb21-531" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].scatter(x, y)</span>
+<span id="cb21-532"><a href="#cb21-532" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].plot(x, theta_0 <span class="op">+</span> theta_1 <span class="op">*</span> x, <span class="st">"tab:red"</span>)</span>
+<span id="cb21-533"><a href="#cb21-533" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].set_xlabel(<span class="st">"Length"</span>)</span>
+<span id="cb21-534"><a href="#cb21-534" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].set_ylabel(<span class="st">"Age"</span>)<span class="op">;</span></span>
+<span id="cb21-535"><a href="#cb21-535" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-536"><a href="#cb21-536" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-537"><a href="#cb21-537" aria-hidden="true" tabindex="-1"></a>Looking at the plot on the left, we see that there is a slight curvature to the data points. Plotting the SLR curve on the right results in a poor fit.</span>
+<span id="cb21-538"><a href="#cb21-538" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-539"><a href="#cb21-539" aria-hidden="true" tabindex="-1"></a>For SLR to perform well, we'd like there to be a rough linear trend relating <span class="in">`"Age"`</span> and <span class="in">`"Length"`</span>. What is making the raw data deviate from a linear relationship? Notice that the data points with <span class="in">`"Length"`</span> greater than 2.6 have disproportionately high values of <span class="in">`"Age"`</span> relative to the rest of the data. If we could manipulate these data points to have lower <span class="in">`"Age"`</span> values, we'd "shift" these points downwards and reduce the curvature in the data. Applying a logarithmic transformation to $y_i$ (that is, taking $\log($ <span class="in">`"Age"`</span> $)$ ) would achieve just that.</span>
+<span id="cb21-540"><a href="#cb21-540" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-541"><a href="#cb21-541" aria-hidden="true" tabindex="-1"></a>An important word on $\log$: in Data 100 (and most upper-division STEM courses), $\log$ denotes the natural logarithm with base $e$. The base-10 logarithm, where relevant, is indicated by $\log_{10}$.</span>
 <span id="cb21-542"><a href="#cb21-542" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-543"><a href="#cb21-543" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].scatter(x, y)</span>
-<span id="cb21-544"><a href="#cb21-544" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].plot(x, theta_0 <span class="op">+</span> theta_1 <span class="op">*</span> x, <span class="st">"tab:red"</span>)</span>
-<span id="cb21-545"><a href="#cb21-545" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].set_xlabel(<span class="st">"Length"</span>)</span>
-<span id="cb21-546"><a href="#cb21-546" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].set_ylabel(<span class="st">"Age"</span>)<span class="op">;</span></span>
-<span id="cb21-547"><a href="#cb21-547" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-545"><a href="#cb21-545" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-546"><a href="#cb21-546" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb21-547"><a href="#cb21-547" aria-hidden="true" tabindex="-1"></a>z <span class="op">=</span> np.log(y)</span>
 <span id="cb21-548"><a href="#cb21-548" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-549"><a href="#cb21-549" aria-hidden="true" tabindex="-1"></a>Looking at the plot on the left, we see that there is a slight curvature to the data points. Plotting the SLR curve on the right results in a poor fit.</span>
-<span id="cb21-550"><a href="#cb21-550" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-551"><a href="#cb21-551" aria-hidden="true" tabindex="-1"></a>For SLR to perform well, we'd like there to be a rough linear trend relating <span class="in">`"Age"`</span> and <span class="in">`"Length"`</span>. What is making the raw data deviate from a linear relationship? Notice that the data points with <span class="in">`"Length"`</span> greater than 2.6 have disproportionately high values of <span class="in">`"Age"`</span> relative to the rest of the data. If we could manipulate these data points to have lower <span class="in">`"Age"`</span> values, we'd "shift" these points downwards and reduce the curvature in the data. Applying a logarithmic transformation to $y_i$ (that is, taking $\log($ <span class="in">`"Age"`</span> $)$ ) would achieve just that.</span>
+<span id="cb21-549"><a href="#cb21-549" aria-hidden="true" tabindex="-1"></a>r <span class="op">=</span> np.corrcoef(x, z)[<span class="dv">0</span>, <span class="dv">1</span>]</span>
+<span id="cb21-550"><a href="#cb21-550" aria-hidden="true" tabindex="-1"></a>theta_1 <span class="op">=</span> r <span class="op">*</span> np.std(z) <span class="op">/</span> np.std(x)</span>
+<span id="cb21-551"><a href="#cb21-551" aria-hidden="true" tabindex="-1"></a>theta_0 <span class="op">=</span> np.mean(z) <span class="op">-</span> theta_1 <span class="op">*</span> np.mean(x)</span>
 <span id="cb21-552"><a href="#cb21-552" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-553"><a href="#cb21-553" aria-hidden="true" tabindex="-1"></a>An important word on $\log$: in Data 100 (and most upper-division STEM courses), $\log$ denotes the natural logarithm with base $e$. The base-10 logarithm, where relevant, is indicated by $\log_{10}$.</span>
-<span id="cb21-554"><a href="#cb21-554" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-557"><a href="#cb21-557" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-558"><a href="#cb21-558" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb21-559"><a href="#cb21-559" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-560"><a href="#cb21-560" aria-hidden="true" tabindex="-1"></a>z <span class="op">=</span> np.log(y)</span>
-<span id="cb21-561"><a href="#cb21-561" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-562"><a href="#cb21-562" aria-hidden="true" tabindex="-1"></a>r <span class="op">=</span> np.corrcoef(x, z)[<span class="dv">0</span>, <span class="dv">1</span>]</span>
-<span id="cb21-563"><a href="#cb21-563" aria-hidden="true" tabindex="-1"></a>theta_1 <span class="op">=</span> r <span class="op">*</span> np.std(z) <span class="op">/</span> np.std(x)</span>
-<span id="cb21-564"><a href="#cb21-564" aria-hidden="true" tabindex="-1"></a>theta_0 <span class="op">=</span> np.mean(z) <span class="op">-</span> theta_1 <span class="op">*</span> np.mean(x)</span>
+<span id="cb21-553"><a href="#cb21-553" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots(<span class="dv">1</span>, <span class="dv">2</span>, dpi<span class="op">=</span><span class="dv">200</span>, figsize<span class="op">=</span>(<span class="dv">8</span>, <span class="dv">3</span>))</span>
+<span id="cb21-554"><a href="#cb21-554" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].scatter(x, z)</span>
+<span id="cb21-555"><a href="#cb21-555" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].set_xlabel(<span class="st">"Length"</span>)</span>
+<span id="cb21-556"><a href="#cb21-556" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].set_ylabel(<span class="vs">r"$\log{(Age)}$"</span>)</span>
+<span id="cb21-557"><a href="#cb21-557" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-558"><a href="#cb21-558" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].scatter(x, z)</span>
+<span id="cb21-559"><a href="#cb21-559" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].plot(x, theta_0 <span class="op">+</span> theta_1 <span class="op">*</span> x, <span class="st">"tab:red"</span>)</span>
+<span id="cb21-560"><a href="#cb21-560" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].set_xlabel(<span class="st">"Length"</span>)</span>
+<span id="cb21-561"><a href="#cb21-561" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].set_ylabel(<span class="vs">r"$\log{(Age)}$"</span>)</span>
+<span id="cb21-562"><a href="#cb21-562" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-563"><a href="#cb21-563" aria-hidden="true" tabindex="-1"></a>plt.subplots_adjust(wspace<span class="op">=</span><span class="fl">0.3</span>)</span>
+<span id="cb21-564"><a href="#cb21-564" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
 <span id="cb21-565"><a href="#cb21-565" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-566"><a href="#cb21-566" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots(<span class="dv">1</span>, <span class="dv">2</span>, dpi<span class="op">=</span><span class="dv">200</span>, figsize<span class="op">=</span>(<span class="dv">8</span>, <span class="dv">3</span>))</span>
-<span id="cb21-567"><a href="#cb21-567" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].scatter(x, z)</span>
-<span id="cb21-568"><a href="#cb21-568" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].set_xlabel(<span class="st">"Length"</span>)</span>
-<span id="cb21-569"><a href="#cb21-569" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">0</span>].set_ylabel(<span class="vs">r"$\log{(Age)}$"</span>)</span>
+<span id="cb21-566"><a href="#cb21-566" aria-hidden="true" tabindex="-1"></a>Our SLR fit looks a lot better! We now have a new target variable: the SLR model is now trying to predict the _log_ of <span class="in">`"Age"`</span>, rather than the untransformed <span class="in">`"Age"`</span>. In other words, we are applying the transformation $z_i = \log{(y_i)}$. Notice that the resulting model is still **linear in the parameters** $\theta = <span class="co">[</span><span class="ot">\theta_0, \theta_1</span><span class="co">]</span>$. The SLR model becomes:</span>
+<span id="cb21-567"><a href="#cb21-567" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-568"><a href="#cb21-568" aria-hidden="true" tabindex="-1"></a>$$\hat{\log{y}} = \theta_0 + \theta_1 x$$</span>
+<span id="cb21-569"><a href="#cb21-569" aria-hidden="true" tabindex="-1"></a>$$\hat{z} = \theta_0 + \theta_1 x$$</span>
 <span id="cb21-570"><a href="#cb21-570" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-571"><a href="#cb21-571" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].scatter(x, z)</span>
-<span id="cb21-572"><a href="#cb21-572" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].plot(x, theta_0 <span class="op">+</span> theta_1 <span class="op">*</span> x, <span class="st">"tab:red"</span>)</span>
-<span id="cb21-573"><a href="#cb21-573" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].set_xlabel(<span class="st">"Length"</span>)</span>
-<span id="cb21-574"><a href="#cb21-574" aria-hidden="true" tabindex="-1"></a>ax[<span class="dv">1</span>].set_ylabel(<span class="vs">r"$\log{(Age)}$"</span>)</span>
-<span id="cb21-575"><a href="#cb21-575" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-576"><a href="#cb21-576" aria-hidden="true" tabindex="-1"></a>plt.subplots_adjust(wspace<span class="op">=</span><span class="fl">0.3</span>)</span>
-<span id="cb21-577"><a href="#cb21-577" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
-<span id="cb21-578"><a href="#cb21-578" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-579"><a href="#cb21-579" aria-hidden="true" tabindex="-1"></a>Our SLR fit looks a lot better! We now have a new target variable: the SLR model is now trying to predict the _log_ of <span class="in">`"Age"`</span>, rather than the untransformed <span class="in">`"Age"`</span>. In other words, we are applying the transformation $z_i = \log{(y_i)}$. Notice that the resulting model is still **linear in the parameters** $\theta = <span class="co">[</span><span class="ot">\theta_0, \theta_1</span><span class="co">]</span>$. The SLR model becomes:</span>
-<span id="cb21-580"><a href="#cb21-580" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-581"><a href="#cb21-581" aria-hidden="true" tabindex="-1"></a>$$\hat{\log{y}} = \theta_0 + \theta_1 x$$</span>
-<span id="cb21-582"><a href="#cb21-582" aria-hidden="true" tabindex="-1"></a>$$\hat{z} = \theta_0 + \theta_1 x$$</span>
-<span id="cb21-583"><a href="#cb21-583" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-584"><a href="#cb21-584" aria-hidden="true" tabindex="-1"></a>It turns out that this linearized relationship can help us understand the underlying relationship between $x$ and $y$. If we rearrange the relationship above, we find:</span>
-<span id="cb21-585"><a href="#cb21-585" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-586"><a href="#cb21-586" aria-hidden="true" tabindex="-1"></a>$$\log{(y)} = \theta_0 + \theta_1 x$$</span>
-<span id="cb21-587"><a href="#cb21-587" aria-hidden="true" tabindex="-1"></a>$$y = e^{\theta_0 + \theta_1 x}$$</span>
-<span id="cb21-588"><a href="#cb21-588" aria-hidden="true" tabindex="-1"></a>$$y = (e^{\theta_0})e^{\theta_1 x}$$</span>
-<span id="cb21-589"><a href="#cb21-589" aria-hidden="true" tabindex="-1"></a>$$y_i = C e^{k x}$$</span>
-<span id="cb21-590"><a href="#cb21-590" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-591"><a href="#cb21-591" aria-hidden="true" tabindex="-1"></a>For some constants $C$ and $k$.</span>
-<span id="cb21-592"><a href="#cb21-592" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-593"><a href="#cb21-593" aria-hidden="true" tabindex="-1"></a>$y$ is an *exponential* function of $x$. Applying an exponential fit to the untransformed variables corroborates this finding.</span>
-<span id="cb21-594"><a href="#cb21-594" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-597"><a href="#cb21-597" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
-<span id="cb21-598"><a href="#cb21-598" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
-<span id="cb21-599"><a href="#cb21-599" aria-hidden="true" tabindex="-1"></a><span class="co">#| vscode: {languageId: python}</span></span>
-<span id="cb21-600"><a href="#cb21-600" aria-hidden="true" tabindex="-1"></a>plt.figure(dpi<span class="op">=</span><span class="dv">120</span>, figsize<span class="op">=</span>(<span class="dv">4</span>, <span class="dv">3</span>))</span>
+<span id="cb21-571"><a href="#cb21-571" aria-hidden="true" tabindex="-1"></a>It turns out that this linearized relationship can help us understand the underlying relationship between $x$ and $y$. If we rearrange the relationship above, we find:</span>
+<span id="cb21-572"><a href="#cb21-572" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-573"><a href="#cb21-573" aria-hidden="true" tabindex="-1"></a>$$\log{(y)} = \theta_0 + \theta_1 x$$</span>
+<span id="cb21-574"><a href="#cb21-574" aria-hidden="true" tabindex="-1"></a>$$y = e^{\theta_0 + \theta_1 x}$$</span>
+<span id="cb21-575"><a href="#cb21-575" aria-hidden="true" tabindex="-1"></a>$$y = (e^{\theta_0})e^{\theta_1 x}$$</span>
+<span id="cb21-576"><a href="#cb21-576" aria-hidden="true" tabindex="-1"></a>$$y_i = C e^{k x}$$</span>
+<span id="cb21-577"><a href="#cb21-577" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-578"><a href="#cb21-578" aria-hidden="true" tabindex="-1"></a>For some constants $C$ and $k$.</span>
+<span id="cb21-579"><a href="#cb21-579" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-580"><a href="#cb21-580" aria-hidden="true" tabindex="-1"></a>$y$ is an *exponential* function of $x$. Applying an exponential fit to the untransformed variables corroborates this finding.</span>
+<span id="cb21-581"><a href="#cb21-581" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-584"><a href="#cb21-584" aria-hidden="true" tabindex="-1"></a><span class="in">```{python}</span></span>
+<span id="cb21-585"><a href="#cb21-585" aria-hidden="true" tabindex="-1"></a><span class="co">#| code-fold: true</span></span>
+<span id="cb21-586"><a href="#cb21-586" aria-hidden="true" tabindex="-1"></a>plt.figure(dpi<span class="op">=</span><span class="dv">120</span>, figsize<span class="op">=</span>(<span class="dv">4</span>, <span class="dv">3</span>))</span>
+<span id="cb21-587"><a href="#cb21-587" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-588"><a href="#cb21-588" aria-hidden="true" tabindex="-1"></a>plt.scatter(x, y)</span>
+<span id="cb21-589"><a href="#cb21-589" aria-hidden="true" tabindex="-1"></a>plt.plot(x, np.exp(theta_0) <span class="op">*</span> np.exp(theta_1 <span class="op">*</span> x), <span class="st">"tab:red"</span>)</span>
+<span id="cb21-590"><a href="#cb21-590" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Length"</span>)</span>
+<span id="cb21-591"><a href="#cb21-591" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Age"</span>)<span class="op">;</span></span>
+<span id="cb21-592"><a href="#cb21-592" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-593"><a href="#cb21-593" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-594"><a href="#cb21-594" aria-hidden="true" tabindex="-1"></a>You may wonder: why did we choose to apply a log transformation specifically? Why not some other function to linearize the data?</span>
+<span id="cb21-595"><a href="#cb21-595" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-596"><a href="#cb21-596" aria-hidden="true" tabindex="-1"></a>Practically, many other mathematical operations that modify the relative scales of <span class="in">`"Age"`</span> and <span class="in">`"Length"`</span> could have worked here.</span>
+<span id="cb21-597"><a href="#cb21-597" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-598"><a href="#cb21-598" aria-hidden="true" tabindex="-1"></a><span class="fu">## Multiple Linear Regression</span></span>
+<span id="cb21-599"><a href="#cb21-599" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-600"><a href="#cb21-600" aria-hidden="true" tabindex="-1"></a>Multiple linear regression is an extension of simple linear regression that adds additional features to the model. The multiple linear regression model takes the form:</span>
 <span id="cb21-601"><a href="#cb21-601" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-602"><a href="#cb21-602" aria-hidden="true" tabindex="-1"></a>plt.scatter(x, y)</span>
-<span id="cb21-603"><a href="#cb21-603" aria-hidden="true" tabindex="-1"></a>plt.plot(x, np.exp(theta_0) <span class="op">*</span> np.exp(theta_1 <span class="op">*</span> x), <span class="st">"tab:red"</span>)</span>
-<span id="cb21-604"><a href="#cb21-604" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Length"</span>)</span>
-<span id="cb21-605"><a href="#cb21-605" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Age"</span>)<span class="op">;</span></span>
-<span id="cb21-606"><a href="#cb21-606" aria-hidden="true" tabindex="-1"></a><span class="in">```</span></span>
+<span id="cb21-602"><a href="#cb21-602" aria-hidden="true" tabindex="-1"></a>$$\hat{y} = \theta_0\:+\:\theta_1x_{1}\:+\:\theta_2 x_{2}\:+\:...\:+\:\theta_p x_{p}$$</span>
+<span id="cb21-603"><a href="#cb21-603" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-604"><a href="#cb21-604" aria-hidden="true" tabindex="-1"></a>Our predicted value of $y$, $\hat{y}$, is a linear combination of the single **observations** (features), $x_i$, and the parameters, $\theta_i$.</span>
+<span id="cb21-605"><a href="#cb21-605" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-606"><a href="#cb21-606" aria-hidden="true" tabindex="-1"></a>We'll dive deeper into Multiple Linear Regression in the next lecture.</span>
 <span id="cb21-607"><a href="#cb21-607" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-608"><a href="#cb21-608" aria-hidden="true" tabindex="-1"></a>You may wonder: why did we choose to apply a log transformation specifically? Why not some other function to linearize the data?</span>
+<span id="cb21-608"><a href="#cb21-608" aria-hidden="true" tabindex="-1"></a><span class="fu">## Bonus: Calculating Constant Model MSE Using an Algebraic Trick</span></span>
 <span id="cb21-609"><a href="#cb21-609" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-610"><a href="#cb21-610" aria-hidden="true" tabindex="-1"></a>Practically, many other mathematical operations that modify the relative scales of <span class="in">`"Age"`</span> and <span class="in">`"Length"`</span> could have worked here.</span>
+<span id="cb21-610"><a href="#cb21-610" aria-hidden="true" tabindex="-1"></a>Earlier, we calculated the constant model MSE using calculus. It turns out that there is a much more elegant way of performing this same minimization algebraically, without using calculus at all.</span>
 <span id="cb21-611"><a href="#cb21-611" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-612"><a href="#cb21-612" aria-hidden="true" tabindex="-1"></a><span class="fu">## Multiple Linear Regression</span></span>
+<span id="cb21-612"><a href="#cb21-612" aria-hidden="true" tabindex="-1"></a>In this calculation, we use the fact that the **sum of deviations from the mean is $0$** or that $\sum_{i=1}^{n} (y_i - \bar{y}) = 0$.</span>
 <span id="cb21-613"><a href="#cb21-613" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-614"><a href="#cb21-614" aria-hidden="true" tabindex="-1"></a>Multiple linear regression is an extension of simple linear regression that adds additional features to the model. The multiple linear regression model takes the form:</span>
-<span id="cb21-615"><a href="#cb21-615" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-616"><a href="#cb21-616" aria-hidden="true" tabindex="-1"></a>$$\hat{y} = \theta_0\:+\:\theta_1x_{1}\:+\:\theta_2 x_{2}\:+\:...\:+\:\theta_p x_{p}$$</span>
-<span id="cb21-617"><a href="#cb21-617" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-618"><a href="#cb21-618" aria-hidden="true" tabindex="-1"></a>Our predicted value of $y$, $\hat{y}$, is a linear combination of the single **observations** (features), $x_i$, and the parameters, $\theta_i$.</span>
-<span id="cb21-619"><a href="#cb21-619" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-620"><a href="#cb21-620" aria-hidden="true" tabindex="-1"></a>We'll dive deeper into Multiple Linear Regression in the next lecture.</span>
-<span id="cb21-621"><a href="#cb21-621" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-622"><a href="#cb21-622" aria-hidden="true" tabindex="-1"></a><span class="fu">## Bonus: Calculating Constant Model MSE Using an Algebraic Trick</span></span>
-<span id="cb21-623"><a href="#cb21-623" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-624"><a href="#cb21-624" aria-hidden="true" tabindex="-1"></a>Earlier, we calculated the constant model MSE using calculus. It turns out that there is a much more elegant way of performing this same minimization algebraically, without using calculus at all.</span>
-<span id="cb21-625"><a href="#cb21-625" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-626"><a href="#cb21-626" aria-hidden="true" tabindex="-1"></a>In this calculation, we use the fact that the **sum of deviations from the mean is $0$** or that $\sum_{i=1}^{n} (y_i - \bar{y}) = 0$.</span>
-<span id="cb21-627"><a href="#cb21-627" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-628"><a href="#cb21-628" aria-hidden="true" tabindex="-1"></a>Let's quickly walk through the proof for this:</span>
-<span id="cb21-629"><a href="#cb21-629" aria-hidden="true" tabindex="-1"></a>$$</span>
-<span id="cb21-630"><a href="#cb21-630" aria-hidden="true" tabindex="-1"></a>\begin{align}</span>
-<span id="cb21-631"><a href="#cb21-631" aria-hidden="true" tabindex="-1"></a>\sum_{i=1}^{n} (y_i - \bar{y}) &amp;= \sum_{i=1}^{n} y_i - \sum_{i=1}^{n} \bar{y} <span class="sc">\\</span></span>
-<span id="cb21-632"><a href="#cb21-632" aria-hidden="true" tabindex="-1"></a> &amp;= \sum_{i=1}^{n} y_i - n\bar{y} <span class="sc">\\</span></span>
-<span id="cb21-633"><a href="#cb21-633" aria-hidden="true" tabindex="-1"></a> &amp;= \sum_{i=1}^{n} y_i - n\frac{1}{n}\sum_{i=1}^{n}y_i <span class="sc">\\</span></span>
-<span id="cb21-634"><a href="#cb21-634" aria-hidden="true" tabindex="-1"></a> &amp;= \sum_{i=1}^{n} y_i - \sum_{i=1}^{n}y_i <span class="sc">\\</span></span>
-<span id="cb21-635"><a href="#cb21-635" aria-hidden="true" tabindex="-1"></a> &amp; = 0</span>
-<span id="cb21-636"><a href="#cb21-636" aria-hidden="true" tabindex="-1"></a>\end{align}</span>
-<span id="cb21-637"><a href="#cb21-637" aria-hidden="true" tabindex="-1"></a>$$</span>
-<span id="cb21-638"><a href="#cb21-638" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-639"><a href="#cb21-639" aria-hidden="true" tabindex="-1"></a>In our calculations, we'll also be using the definition of the variance as a sample. As a refresher:</span>
-<span id="cb21-640"><a href="#cb21-640" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-641"><a href="#cb21-641" aria-hidden="true" tabindex="-1"></a>$$\sigma_y^2 = \frac{1}{n}\sum_{i=1}^{n} (y_i - \bar{y})^2$$</span>
+<span id="cb21-614"><a href="#cb21-614" aria-hidden="true" tabindex="-1"></a>Let's quickly walk through the proof for this:</span>
+<span id="cb21-615"><a href="#cb21-615" aria-hidden="true" tabindex="-1"></a>$$</span>
+<span id="cb21-616"><a href="#cb21-616" aria-hidden="true" tabindex="-1"></a>\begin{align}</span>
+<span id="cb21-617"><a href="#cb21-617" aria-hidden="true" tabindex="-1"></a>\sum_{i=1}^{n} (y_i - \bar{y}) &amp;= \sum_{i=1}^{n} y_i - \sum_{i=1}^{n} \bar{y} <span class="sc">\\</span></span>
+<span id="cb21-618"><a href="#cb21-618" aria-hidden="true" tabindex="-1"></a> &amp;= \sum_{i=1}^{n} y_i - n\bar{y} <span class="sc">\\</span></span>
+<span id="cb21-619"><a href="#cb21-619" aria-hidden="true" tabindex="-1"></a> &amp;= \sum_{i=1}^{n} y_i - n\frac{1}{n}\sum_{i=1}^{n}y_i <span class="sc">\\</span></span>
+<span id="cb21-620"><a href="#cb21-620" aria-hidden="true" tabindex="-1"></a> &amp;= \sum_{i=1}^{n} y_i - \sum_{i=1}^{n}y_i <span class="sc">\\</span></span>
+<span id="cb21-621"><a href="#cb21-621" aria-hidden="true" tabindex="-1"></a> &amp; = 0</span>
+<span id="cb21-622"><a href="#cb21-622" aria-hidden="true" tabindex="-1"></a>\end{align}</span>
+<span id="cb21-623"><a href="#cb21-623" aria-hidden="true" tabindex="-1"></a>$$</span>
+<span id="cb21-624"><a href="#cb21-624" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-625"><a href="#cb21-625" aria-hidden="true" tabindex="-1"></a>In our calculations, we'll also be using the definition of the variance as a sample. As a refresher:</span>
+<span id="cb21-626"><a href="#cb21-626" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-627"><a href="#cb21-627" aria-hidden="true" tabindex="-1"></a>$$\sigma_y^2 = \frac{1}{n}\sum_{i=1}^{n} (y_i - \bar{y})^2$$</span>
+<span id="cb21-628"><a href="#cb21-628" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-629"><a href="#cb21-629" aria-hidden="true" tabindex="-1"></a>Getting into our calculation for MSE minimization:</span>
+<span id="cb21-630"><a href="#cb21-630" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-631"><a href="#cb21-631" aria-hidden="true" tabindex="-1"></a>$$</span>
+<span id="cb21-632"><a href="#cb21-632" aria-hidden="true" tabindex="-1"></a>\begin{align}</span>
+<span id="cb21-633"><a href="#cb21-633" aria-hidden="true" tabindex="-1"></a>R(\theta) &amp;= {\frac{1}{n}}\sum^{n}_{i=1} (y_i - \theta)^2</span>
+<span id="cb21-634"><a href="#cb21-634" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \frac{1}{n}\sum^{n}_{i=1} <span class="co">[</span><span class="ot">(y_i - \bar{y}) + (\bar{y} - \theta)</span><span class="co">]</span>^2\quad \quad \text{using trick that a-b can be written as (a-c) + (c-b) } <span class="sc">\\</span></span>
+<span id="cb21-635"><a href="#cb21-635" aria-hidden="true" tabindex="-1"></a>&amp;\quad \quad \quad \quad \quad \quad \quad \quad \quad \quad \quad \quad \quad \quad \space \space \text{where a, b, and c are any numbers}</span>
+<span id="cb21-636"><a href="#cb21-636" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \frac{1}{n}\sum^{n}_{i=1} <span class="co">[</span><span class="ot">(y_i - \bar{y})^2 + 2(y_i - \bar{y})(\bar{y} - \theta) + (\bar{y} - \theta)^2</span><span class="co">]</span></span>
+<span id="cb21-637"><a href="#cb21-637" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \frac{1}{n}<span class="co">[</span><span class="ot">\sum^{n}_{i=1}(y_i - \bar{y})^2 + 2(\bar{y} - \theta)\sum^{n}_{i=1}(y_i - \bar{y}) + n(\bar{y} - \theta)^2</span><span class="co">]</span> \quad \quad  \text{distribute sum to individual terms}</span>
+<span id="cb21-638"><a href="#cb21-638" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \frac{1}{n}\sum^{n}_{i=1}(y_i - \bar{y})^2 + \frac{2}{n}(\bar{y} - \theta)\cdot0 + (\bar{y} - \theta)^2 \quad \quad  \text{sum of deviations from mean is 0}</span>
+<span id="cb21-639"><a href="#cb21-639" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \sigma_y^2 + (\bar{y} - \theta)^2</span>
+<span id="cb21-640"><a href="#cb21-640" aria-hidden="true" tabindex="-1"></a>\end{align}</span>
+<span id="cb21-641"><a href="#cb21-641" aria-hidden="true" tabindex="-1"></a>$$</span>
 <span id="cb21-642"><a href="#cb21-642" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-643"><a href="#cb21-643" aria-hidden="true" tabindex="-1"></a>Getting into our calculation for MSE minimization:</span>
+<span id="cb21-643"><a href="#cb21-643" aria-hidden="true" tabindex="-1"></a>Since variance can't be negative, we know that our first term, $\sigma_y^2$ is greater than or equal to $0$. Also note, that **the first term doesn't involve $\theta$ at all**, meaning changing our model won't change this value. For the purposes of determining $\hat{\theta}#, we can then essentially ignore this term.</span>
 <span id="cb21-644"><a href="#cb21-644" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-645"><a href="#cb21-645" aria-hidden="true" tabindex="-1"></a>$$</span>
-<span id="cb21-646"><a href="#cb21-646" aria-hidden="true" tabindex="-1"></a>\begin{align}</span>
-<span id="cb21-647"><a href="#cb21-647" aria-hidden="true" tabindex="-1"></a>R(\theta) &amp;= {\frac{1}{n}}\sum^{n}_{i=1} (y_i - \theta)^2</span>
-<span id="cb21-648"><a href="#cb21-648" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \frac{1}{n}\sum^{n}_{i=1} <span class="co">[</span><span class="ot">(y_i - \bar{y}) + (\bar{y} - \theta)</span><span class="co">]</span>^2\quad \quad \text{using trick that a-b can be written as (a-c) + (c-b) } <span class="sc">\\</span></span>
-<span id="cb21-649"><a href="#cb21-649" aria-hidden="true" tabindex="-1"></a>&amp;\quad \quad \quad \quad \quad \quad \quad \quad \quad \quad \quad \quad \quad \quad \space \space \text{where a, b, and c are any numbers}</span>
-<span id="cb21-650"><a href="#cb21-650" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \frac{1}{n}\sum^{n}_{i=1} <span class="co">[</span><span class="ot">(y_i - \bar{y})^2 + 2(y_i - \bar{y})(\bar{y} - \theta) + (\bar{y} - \theta)^2</span><span class="co">]</span></span>
-<span id="cb21-651"><a href="#cb21-651" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \frac{1}{n}<span class="co">[</span><span class="ot">\sum^{n}_{i=1}(y_i - \bar{y})^2 + 2(\bar{y} - \theta)\sum^{n}_{i=1}(y_i - \bar{y}) + n(\bar{y} - \theta)^2</span><span class="co">]</span> \quad \quad  \text{distribute sum to individual terms}</span>
-<span id="cb21-652"><a href="#cb21-652" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \frac{1}{n}\sum^{n}_{i=1}(y_i - \bar{y})^2 + \frac{2}{n}(\bar{y} - \theta)\cdot0 + (\bar{y} - \theta)^2 \quad \quad  \text{sum of deviations from mean is 0}</span>
-<span id="cb21-653"><a href="#cb21-653" aria-hidden="true" tabindex="-1"></a><span class="sc">\\</span> &amp;= \sigma_y^2 + (\bar{y} - \theta)^2</span>
-<span id="cb21-654"><a href="#cb21-654" aria-hidden="true" tabindex="-1"></a>\end{align}</span>
-<span id="cb21-655"><a href="#cb21-655" aria-hidden="true" tabindex="-1"></a>$$</span>
-<span id="cb21-656"><a href="#cb21-656" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-657"><a href="#cb21-657" aria-hidden="true" tabindex="-1"></a>Since variance can't be negative, we know that our first term, $\sigma_y^2$ is greater than or equal to $0$. Also note, that **the first term doesn't involve $\theta$ at all**, meaning changing our model won't change this value. For the purposes of determining $\hat{\theta}#, we can then essentially ignore this term.</span>
-<span id="cb21-658"><a href="#cb21-658" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-659"><a href="#cb21-659" aria-hidden="true" tabindex="-1"></a>Looking at the second term, $(\bar{y} - \theta)^2$, since it is squared, we know it must be greater than or equal to $0$. As this term does involve $\theta$, picking the value of $\theta$ that minimizes this term will allow us to minimize our average loss. For the second term to equal $0$, $\theta = \bar{y}$, or in other words, $\hat{\theta} = \bar{y} = mean(y)$.</span>
-<span id="cb21-660"><a href="#cb21-660" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-661"><a href="#cb21-661" aria-hidden="true" tabindex="-1"></a><span class="fu">##### Note</span></span>
-<span id="cb21-662"><a href="#cb21-662" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-663"><a href="#cb21-663" aria-hidden="true" tabindex="-1"></a>In the derivation above, we decompose the expected loss, $R(\theta)$, into two key components: the variance of the data, $\sigma_y^2$, and the square of the bias, $(\bar{y} - \theta)^2$. This decomposition is insightful for understanding the behavior of estimators in statistical models.</span>
-<span id="cb21-664"><a href="#cb21-664" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-665"><a href="#cb21-665" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>**Variance, $\sigma_y^2$**: This term represents the spread of the data points around their mean, $\bar{y}$, and is a measure of the data's inherent variability. Importantly, it does not depend on the choice of $\theta$, meaning it's a fixed property of the data. Variance serves as an indicator of the data's dispersion and is crucial in understanding the dataset's structure, but it remains constant regardless of how we adjust our model parameter $\theta$.</span>
-<span id="cb21-666"><a href="#cb21-666" aria-hidden="true" tabindex="-1"></a></span>
-<span id="cb21-667"><a href="#cb21-667" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>**Bias Squared, $(\bar{y} - \theta)^2$**: This term captures the bias of the estimator, defined as the square of the difference between the mean of the data points, $\bar{y}$, and the parameter $\theta$. The bias quantifies the systematic error introduced when estimating $\theta$. Minimizing this term is essential for improving the accuracy of the estimator. When $\theta = \bar{y}$, the bias is $0$, indicating that the estimator is unbiased for the parameter it estimates. This highlights a critical principle in statistical estimation: choosing $\theta$ to be the sample mean, $\bar{y}$, minimizes the average loss, rendering the estimator both efficient and unbiased for the population mean.</span>
+<span id="cb21-645"><a href="#cb21-645" aria-hidden="true" tabindex="-1"></a>Looking at the second term, $(\bar{y} - \theta)^2$, since it is squared, we know it must be greater than or equal to $0$. As this term does involve $\theta$, picking the value of $\theta$ that minimizes this term will allow us to minimize our average loss. For the second term to equal $0$, $\theta = \bar{y}$, or in other words, $\hat{\theta} = \bar{y} = mean(y)$.</span>
+<span id="cb21-646"><a href="#cb21-646" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-647"><a href="#cb21-647" aria-hidden="true" tabindex="-1"></a><span class="fu">##### Note</span></span>
+<span id="cb21-648"><a href="#cb21-648" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-649"><a href="#cb21-649" aria-hidden="true" tabindex="-1"></a>In the derivation above, we decompose the expected loss, $R(\theta)$, into two key components: the variance of the data, $\sigma_y^2$, and the square of the bias, $(\bar{y} - \theta)^2$. This decomposition is insightful for understanding the behavior of estimators in statistical models.</span>
+<span id="cb21-650"><a href="#cb21-650" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-651"><a href="#cb21-651" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>**Variance, $\sigma_y^2$**: This term represents the spread of the data points around their mean, $\bar{y}$, and is a measure of the data's inherent variability. Importantly, it does not depend on the choice of $\theta$, meaning it's a fixed property of the data. Variance serves as an indicator of the data's dispersion and is crucial in understanding the dataset's structure, but it remains constant regardless of how we adjust our model parameter $\theta$.</span>
+<span id="cb21-652"><a href="#cb21-652" aria-hidden="true" tabindex="-1"></a></span>
+<span id="cb21-653"><a href="#cb21-653" aria-hidden="true" tabindex="-1"></a><span class="ss">- </span>**Bias Squared, $(\bar{y} - \theta)^2$**: This term captures the bias of the estimator, defined as the square of the difference between the mean of the data points, $\bar{y}$, and the parameter $\theta$. The bias quantifies the systematic error introduced when estimating $\theta$. Minimizing this term is essential for improving the accuracy of the estimator. When $\theta = \bar{y}$, the bias is $0$, indicating that the estimator is unbiased for the parameter it estimates. This highlights a critical principle in statistical estimation: choosing $\theta$ to be the sample mean, $\bar{y}$, minimizes the average loss, rendering the estimator both efficient and unbiased for the population mean.</span>
 </code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div></div></div></div></div>
 </div> <!-- /content -->
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf
index 807818cd..7b66b962 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-13-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf
index a8d7f108..02a64577 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-14-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf
index 93d961ca..85c5c531 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-15-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf
index 8f68f0b2..24402808 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-4-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf
index 8bb2661a..05b0bd69 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-5-output-1.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf
index 0d9a275f..f9e94bad 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-7-output-2.pdf differ
diff --git a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf
index 42acfd6d..211eb515 100644
Binary files a/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf and b/docs/constant_model_loss_transformations/loss_transformations_files/figure-pdf/cell-8-output-1.pdf differ
diff --git a/docs/eda/eda.html b/docs/eda/eda.html
index 31fc8fec..c79eb732 100644
--- a/docs/eda/eda.html
+++ b/docs/eda/eda.html
@@ -361,7 +361,7 @@ <h2 id="toc-title">Data Cleaning and EDA</h2>
 </header>
 
 
-<div id="6d26d436" class="cell" data-execution_count="1">
+<div id="5c3c9357" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -426,7 +426,7 @@ <h3 data-number="5.1.1" class="anchored" data-anchor-id="file-formats"><span cla
 <section id="csv" class="level4" data-number="5.1.1.1">
 <h4 data-number="5.1.1.1" class="anchored" data-anchor-id="csv"><span class="header-section-number">5.1.1.1</span> CSV</h4>
 <p>CSVs, which stand for <strong>Comma-Separated Values</strong>, are a common tabular data format. In the past two <code>pandas</code> lectures, we briefly touched on the idea of file format: the way data is encoded in a file for storage. Specifically, our <code>elections</code> and <code>babynames</code> datasets were stored and loaded as CSVs:</p>
-<div id="cdcc258c" class="cell" data-execution_count="2">
+<div id="7277c936" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>pd.read_csv(<span class="st">"data/elections.csv"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="2">
 <div>
@@ -497,7 +497,7 @@ <h4 data-number="5.1.1.1" class="anchored" data-anchor-id="csv"><span class="hea
 </div>
 </div>
 <p>To better understand the properties of a CSV, let’s take a look at the first few rows of the raw data file to see what it looks like before being loaded into a <code>DataFrame</code>. We’ll use the <code>repr()</code> function to return the raw string with its special characters:</p>
-<div id="baff4378" class="cell" data-execution_count="3">
+<div id="922b470f" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
@@ -518,7 +518,7 @@ <h4 data-number="5.1.1.1" class="anchored" data-anchor-id="csv"><span class="hea
 <h4 data-number="5.1.1.2" class="anchored" data-anchor-id="tsv"><span class="header-section-number">5.1.1.2</span> TSV</h4>
 <p>Another common file type is <strong>TSV (Tab-Separated Values)</strong>. In a TSV, records are still delimited by a newline <code>\n</code>, while fields are delimited by <code>\t</code> tab character.</p>
 <p>Let’s check out the first few rows of the raw TSV file. Again, we’ll use the <code>repr()</code> function so that <code>print</code> shows the special characters.</p>
-<div id="02102030" class="cell" data-execution_count="4">
+<div id="7252c060" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.txt"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
@@ -534,7 +534,7 @@ <h4 data-number="5.1.1.2" class="anchored" data-anchor-id="tsv"><span class="hea
 </div>
 </div>
 <p>TSVs can be loaded into <code>pandas</code> using <code>pd.read_csv</code>. We’ll need to specify the <strong>delimiter</strong> with parameter<code>sep='\t'</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html">(documentation)</a>.</p>
-<div id="63f17fd8" class="cell" data-execution_count="5">
+<div id="f0fdb2a8" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>pd.read_csv(<span class="st">"data/elections.txt"</span>, sep<span class="op">=</span><span class="st">'</span><span class="ch">\t</span><span class="st">'</span>).head(<span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
 <div>
@@ -591,7 +591,7 @@ <h4 data-number="5.1.1.2" class="anchored" data-anchor-id="tsv"><span class="hea
 <section id="json" class="level4" data-number="5.1.1.3">
 <h4 data-number="5.1.1.3" class="anchored" data-anchor-id="json"><span class="header-section-number">5.1.1.3</span> JSON</h4>
 <p><strong>JSON (JavaScript Object Notation)</strong> files behave similarly to Python dictionaries. A raw JSON is shown below.</p>
-<div id="ee849891" class="cell" data-execution_count="6">
+<div id="20f7fe86" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/elections.json"</span>, <span class="st">"r"</span>) <span class="im">as</span> table:</span>
 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>    i <span class="op">=</span> <span class="dv">0</span></span>
 <span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> row <span class="kw">in</span> table:</span>
@@ -621,7 +621,7 @@ <h4 data-number="5.1.1.3" class="anchored" data-anchor-id="json"><span class="he
 </div>
 </div>
 <p>JSON files can be loaded into <code>pandas</code> using <code>pd.read_json</code>.</p>
-<div id="52d5a807" class="cell" data-execution_count="7">
+<div id="85d3704b" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>pd.read_json(<span class="st">'data/elections.json'</span>).head(<span class="dv">3</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
 <div>
@@ -676,7 +676,7 @@ <h4 data-number="5.1.1.3" class="anchored" data-anchor-id="json"><span class="he
 <section id="eda-with-json-berkeley-covid-19-data" class="level5" data-number="5.1.1.3.1">
 <h5 data-number="5.1.1.3.1" class="anchored" data-anchor-id="eda-with-json-berkeley-covid-19-data"><span class="header-section-number">5.1.1.3.1</span> EDA with JSON: Berkeley COVID-19 Data</h5>
 <p>The City of Berkeley Open Data <a href="https://data.cityofberkeley.info/Health/COVID-19-Confirmed-Cases/xn6j-b766">website</a> has a dataset with COVID-19 Confirmed Cases among Berkeley residents by date. Let’s download the file and save it as a JSON (note the source URL file type is also a JSON). In the interest of reproducible data science, we will download the data programatically. We have defined some helper functions in the <a href="https://ds100.org/fa23/resources/assets/lectures/lec05/lec05-eda.html"><code>ds100_utils.py</code></a> file that we can reuse these helper functions in many different notebooks.</p>
-<div id="9e7c5eb1" class="cell" data-execution_count="8">
+<div id="39645955" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> ds100_utils <span class="im">import</span> fetch_and_cache</span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>covid_file <span class="op">=</span> fetch_and_cache(</span>
@@ -695,7 +695,7 @@ <h5 data-number="5.1.1.3.1" class="anchored" data-anchor-id="eda-with-json-berke
 <h6 data-number="5.1.1.3.1.1" class="anchored" data-anchor-id="file-size"><span class="header-section-number">5.1.1.3.1.1</span> File Size</h6>
 <p>Let’s start our analysis by getting a rough estimate of the size of the dataset to inform the tools we use to view the data. For relatively small datasets, we can use a text editor or spreadsheet. For larger datasets, more programmatic exploration or distributed computing tools may be more fitting. Here we will use <code>Python</code> tools to probe the file.</p>
 <p>Since there seem to be text files, let’s investigate the number of lines, which often corresponds to the number of records</p>
-<div id="eb014c8b" class="cell" data-execution_count="9">
+<div id="36b73114" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> os</span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(covid_file, <span class="st">"is"</span>, os.path.getsize(covid_file) <span class="op">/</span> <span class="fl">1e6</span>, <span class="st">"MB"</span>)</span>
@@ -713,7 +713,7 @@ <h6 data-number="5.1.1.3.1.2" class="anchored" data-anchor-id="unix-commands"><s
 <p>As part of the EDA workflow, Unix commands can come in very handy. In fact, there’s an entire book called <a href="https://datascienceatthecommandline.com/">“Data Science at the Command Line”</a> that explores this idea in depth! In Jupyter/IPython, you can prefix lines with <code>!</code> to execute arbitrary Unix commands, and within those lines, you can refer to Python variables and expressions with the syntax <code>{expr}</code>.</p>
 <p>Here, we use the <code>ls</code> command to list files, using the <code>-lh</code> flags, which request “long format with information in human-readable form.” We also use the <code>wc</code> command for “word count,” but with the <code>-l</code> flag, which asks for line counts instead of words.</p>
 <p>These two give us the same information as the code above, albeit in a slightly different form:</p>
-<div id="3252e5d8" class="cell" data-execution_count="10">
+<div id="b791a6ba" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>ls <span class="op">-</span>lh {covid_file}</span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>wc <span class="op">-</span>l {covid_file}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
@@ -725,7 +725,7 @@ <h6 data-number="5.1.1.3.1.2" class="anchored" data-anchor-id="unix-commands"><s
 <section id="file-contents" class="level6" data-number="5.1.1.3.1.3">
 <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><span class="header-section-number">5.1.1.3.1.3</span> File Contents</h6>
 <p>Let’s explore the data format using <code>Python</code>.</p>
-<div id="df1cb33b" class="cell" data-execution_count="11">
+<div id="a3677400" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">for</span> i, row <span class="kw">in</span> <span class="bu">enumerate</span>(f):</span>
 <span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a>        <span class="bu">print</span>(<span class="bu">repr</span>(row)) <span class="co"># print raw strings</span></span>
@@ -739,7 +739,7 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 </div>
 </div>
 <p>We can use the <code>head</code> Unix command (which is where <code>pandas</code>’ <code>head</code> method comes from!) to see the first few lines of the file:</p>
-<div id="0f94168e" class="cell" data-execution_count="12">
+<div id="54004e23" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="op">!</span>head <span class="op">-</span><span class="dv">5</span> {covid_file}</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>{
@@ -750,21 +750,21 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 </div>
 </div>
 <p>In order to load the JSON file into <code>pandas</code>, Let’s first do some EDA with Oython’s <code>json</code> package to understand the particular structure of this JSON file so that we can decide what (if anything) to load into <code>pandas</code>. Python has relatively good support for JSON data since it closely matches the internal python object model. In the following cell we import the entire JSON datafile into a python dictionary using the <code>json</code> package.</p>
-<div id="143fd225" class="cell" data-execution_count="13">
+<div id="0c72a902" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> json</span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(covid_file, <span class="st">"rb"</span>) <span class="im">as</span> f:</span>
 <span id="cb22-4"><a href="#cb22-4" aria-hidden="true" tabindex="-1"></a>    covid_json <span class="op">=</span> json.load(f)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>The <code>covid_json</code> variable is now a dictionary encoding the data in the file:</p>
-<div id="55519e6a" class="cell" data-execution_count="14">
+<div id="69ac7042" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(covid_json)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
 <pre><code>dict</code></pre>
 </div>
 </div>
 <p>We can examine what keys are in the top level JSON object by listing out the keys.</p>
-<div id="b5f594db" class="cell" data-execution_count="15">
+<div id="490db408" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>covid_json.keys()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
 <pre><code>dict_keys(['meta', 'data'])</code></pre>
@@ -772,14 +772,14 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 </div>
 <p><strong>Observation</strong>: The JSON dictionary contains a <code>meta</code> key which likely refers to metadata (data about the data). Metadata is often maintained with the data and can be a good source of additional information.</p>
 <p>We can investigate the metadata further by examining the keys associated with the metadata.</p>
-<div id="a3c58da8" class="cell" data-execution_count="16">
+<div id="1c650b46" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>covid_json[<span class="st">'meta'</span>].keys()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="16">
 <pre><code>dict_keys(['view'])</code></pre>
 </div>
 </div>
 <p>The <code>meta</code> key contains another dictionary called <code>view</code>. This likely refers to metadata about a particular “view” of some underlying database. We will learn more about views when we study SQL later in the class.</p>
-<div id="13edcb90" class="cell" data-execution_count="17">
+<div id="36df3de7" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>].keys()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="17">
 <pre><code>dict_keys(['id', 'name', 'assetType', 'attribution', 'averageRating', 'category', 'createdAt', 'description', 'displayType', 'downloadCount', 'hideFromCatalog', 'hideFromDataJson', 'newBackend', 'numberOfComments', 'oid', 'provenance', 'publicationAppendEnabled', 'publicationDate', 'publicationGroup', 'publicationStage', 'rowsUpdatedAt', 'rowsUpdatedBy', 'tableId', 'totalTimesRated', 'viewCount', 'viewLastModified', 'viewType', 'approvals', 'columns', 'grants', 'metadata', 'owner', 'query', 'rights', 'tableAuthor', 'tags', 'flags'])</code></pre>
@@ -799,7 +799,7 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
     | -&gt; columns
     ...</code></pre>
 <p>There is a key called description in the view sub dictionary. This likely contains a description of the data:</p>
-<div id="9419e595" class="cell" data-execution_count="18">
+<div id="c799bd25" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'description'</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
 <pre><code>Counts of confirmed COVID-19 cases among Berkeley residents by date.</code></pre>
@@ -809,7 +809,7 @@ <h6 data-number="5.1.1.3.1.3" class="anchored" data-anchor-id="file-contents"><s
 <section id="examining-the-data-field-for-records" class="level6" data-number="5.1.1.3.1.4">
 <h6 data-number="5.1.1.3.1.4" class="anchored" data-anchor-id="examining-the-data-field-for-records"><span class="header-section-number">5.1.1.3.1.4</span> Examining the Data Field for Records</h6>
 <p>We can look at a few entries in the <code>data</code> field. This is what we’ll load into <code>pandas</code>.</p>
-<div id="f33d288c" class="cell" data-execution_count="19">
+<div id="272c4249" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> i <span class="kw">in</span> <span class="bu">range</span>(<span class="dv">3</span>):</span>
 <span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>    <span class="bu">print</span>(<span class="ss">f"</span><span class="sc">{</span>i<span class="sc">:03}</span><span class="ss"> | </span><span class="sc">{</span>covid_json[<span class="st">'data'</span>][i]<span class="sc">}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stdout">
@@ -820,7 +820,7 @@ <h6 data-number="5.1.1.3.1.4" class="anchored" data-anchor-id="examining-the-dat
 </div>
 <p>Observations: * These look like equal-length records, so maybe <code>data</code> is a table! * But what do each of values in the record mean? Where can we find column headers?</p>
 <p>For that, we’ll need the <code>columns</code> key in the metadata dictionary. This returns a list:</p>
-<div id="d37ee5ef" class="cell" data-execution_count="20">
+<div id="e1e44676" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(covid_json[<span class="st">'meta'</span>][<span class="st">'view'</span>][<span class="st">'columns'</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="20">
 <pre><code>list</code></pre>
@@ -847,7 +847,7 @@ <h6 data-number="5.1.1.3.1.6" class="anchored" data-anchor-id="loading-covid-dat
 <li><p>Remove columns that have no metadata description. This would be a bad idea in general, but here we remove these columns since the above analysis suggests they are unlikely to contain useful information.</p></li>
 <li><p>Examine the <code>tail</code> of the table.</p></li>
 </ol>
-<div id="443342c0" class="cell" data-execution_count="21">
+<div id="bc06047d" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Load the data from JSON and assign column titles</span></span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>covid <span class="op">=</span> pd.DataFrame(</span>
 <span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>    covid_json[<span class="st">'data'</span>],</span>
@@ -960,7 +960,7 @@ <h6 data-number="5.1.1.3.1.6" class="anchored" data-anchor-id="loading-covid-dat
 <h3 data-number="5.1.2" class="anchored" data-anchor-id="primary-and-foreign-keys"><span class="header-section-number">5.1.2</span> Primary and Foreign Keys</h3>
 <p>Last time, we introduced <code>.merge</code> as the <code>pandas</code> method for joining multiple <code>DataFrame</code>s together. In our discussion of joins, we touched on the idea of using a “key” to determine what rows should be merged from each table. Let’s take a moment to examine this idea more closely.</p>
 <p>The <strong>primary key</strong> is the column or set of columns in a table that <em>uniquely</em> determine the values of the remaining columns. It can be thought of as the unique identifier for each individual row in the table. For example, a table of Data 100 students might use each student’s Cal ID as the primary key.</p>
-<div id="89b67a17" class="cell" data-execution_count="22">
+<div id="133656cf" class="cell" data-execution_count="22">
 <div class="cell-output cell-output-display" data-execution_count="22">
 <div>
 
@@ -1006,7 +1006,7 @@ <h3 data-number="5.1.2" class="anchored" data-anchor-id="primary-and-foreign-key
 </div>
 </div>
 <p>The <strong>foreign key</strong> is the column or set of columns in a table that reference primary keys in other tables. Knowing a dataset’s foreign keys can be useful when assigning the <code>left_on</code> and <code>right_on</code> parameters of <code>.merge</code>. In the table of office hour tickets below, <code>"Cal ID"</code> is a foreign key referencing the previous table.</p>
-<div id="78870b59" class="cell" data-execution_count="23">
+<div id="244b4e2e" class="cell" data-execution_count="23">
 <div class="cell-output cell-output-display" data-execution_count="23">
 <div>
 
@@ -1099,7 +1099,7 @@ <h3 data-number="5.2.3" class="anchored" data-anchor-id="temporality"><span clas
 <section id="temporality-with-pandas-dt-accessors" class="level4" data-number="5.2.3.1">
 <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pandas-dt-accessors"><span class="header-section-number">5.2.3.1</span> Temporality with <code>pandas</code>’ <code>dt</code> accessors</h4>
 <p>Let’s briefly look at how we can use <code>pandas</code>’ <code>dt</code> accessors to work with dates/times in a dataset using the dataset you’ll see in Lab 3: the Berkeley PD Calls for Service dataset.</p>
-<div id="36525b81" class="cell" data-execution_count="24">
+<div id="8014f0f8" class="cell" data-execution_count="24">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>calls <span class="op">=</span> pd.read_csv(<span class="st">"data/Berkeley_PD_-_Calls_for_Service.csv"</span>)</span>
@@ -1206,11 +1206,11 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 <p>Looks like there are three columns with dates/times: <code>EVENTDT</code>, <code>EVENTTM</code>, and <code>InDbDate</code>.</p>
 <p>Most likely, <code>EVENTDT</code> stands for the date when the event took place, <code>EVENTTM</code> stands for the time of day the event took place (in 24-hr format), and <code>InDbDate</code> is the date this call is recorded onto the database.</p>
 <p>If we check the data type of these columns, we will see they are stored as strings. We can convert them to <code>datetime</code> objects using pandas <code>to_datetime</code> function.</p>
-<div id="6a636f9c" class="cell" data-execution_count="25">
+<div id="6e5862ab" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>] <span class="op">=</span> pd.to_datetime(calls[<span class="st">"EVENTDT"</span>])</span>
 <span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>calls.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98202/874729699.py:1: UserWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32669/874729699.py:1: UserWarning:
 
 Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
 </code></pre>
@@ -1315,7 +1315,7 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 </div>
 <p>Now, we can use the <code>dt</code> accessor on this column.</p>
 <p>We can get the month:</p>
-<div id="608299f3" class="cell" data-execution_count="26">
+<div id="5c03f913" class="cell" data-execution_count="26">
 <div class="sourceCode cell-code" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>].dt.month.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="26">
 <pre><code>0    4
@@ -1327,7 +1327,7 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 </div>
 </div>
 <p>Which day of the week the date is on:</p>
-<div id="3a2b8d9e" class="cell" data-execution_count="27">
+<div id="512c2f6a" class="cell" data-execution_count="27">
 <div class="sourceCode cell-code" id="cb44"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>calls[<span class="st">"EVENTDT"</span>].dt.dayofweek.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="27">
 <pre><code>0    3
@@ -1339,7 +1339,7 @@ <h4 data-number="5.2.3.1" class="anchored" data-anchor-id="temporality-with-pand
 </div>
 </div>
 <p>Check the mimimum values to see if there are any suspicious-looking, 70s dates:</p>
-<div id="d4493d2e" class="cell" data-execution_count="28">
+<div id="c1aa23ae" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb46"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>calls.sort_values(<span class="st">"EVENTDT"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
 <div>
@@ -1486,7 +1486,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 <p>We can then explore the CSV (which is a text file, and does not contain binary-encoded data) in many ways: 1. Using a text editor like emacs, vim, VSCode, etc. 2. Opening the CSV directly in DataHub (read-only), Excel, Google Sheets, etc. 3. The <code>Python</code> file object 4. <code>pandas</code>, using <code>pd.read_csv()</code></p>
 <p>To try out options 1 and 2, you can view or download the Tuberculosis from the <a href="https://data100.datahub.berkeley.edu/hub/user-redirect/git-pull?repo=https%3A%2F%2Fgithub.com%2FDS-100%2Ffa23-student&amp;urlpath=lab%2Ftree%2Ffa23-student%2Flecture%2Flec05%2Flec04-eda.ipynb&amp;branch=main">lecture demo notebook</a> under the <code>data</code> folder in the left hand menu. Notice how the CSV file is a type of <strong>rectangular data (i.e., tabular data) stored as comma-separated values</strong>.</p>
 <p>Next, let’s try out option 3 using the <code>Python</code> file object. We’ll look at the first four lines:</p>
-<div id="3179967f" class="cell" data-execution_count="29">
+<div id="4b2c6ffc" class="cell" data-execution_count="29">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb47"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/cdc_tuberculosis.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
@@ -1511,7 +1511,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 <p>Whoa, why are there blank lines interspaced between the lines of the CSV?</p>
 <p>You may recall that all line breaks in text files are encoded as the special newline character <code>\n</code>. Python’s <code>print()</code> prints each string (including the newline), and an additional newline on top of that.</p>
 <p>If you’re curious, we can use the <code>repr()</code> function to return the raw string with all special characters:</p>
-<div id="bebce08b" class="cell" data-execution_count="30">
+<div id="75a232cf" class="cell" data-execution_count="30">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">"data/cdc_tuberculosis.csv"</span>, <span class="st">"r"</span>) <span class="im">as</span> f:</span>
@@ -1530,7 +1530,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 </div>
 </div>
 <p>Finally, let’s try option 4 and use the tried-and-true Data 100 approach: <code>pandas</code>.</p>
-<div id="429a3a72" class="cell" data-execution_count="31">
+<div id="f2412b49" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb51"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>)</span>
 <span id="cb51-2"><a href="#cb51-2" aria-hidden="true" tabindex="-1"></a>tb_df.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="31">
@@ -1610,7 +1610,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 <p>You may notice some strange things about this table: what’s up with the “Unnamed” column names and the first row?</p>
 <p>Congratulations — you’re ready to wrangle your data! Because of how things are stored, we’ll need to clean the data a bit to name our columns better.</p>
 <p>A reasonable first step is to identify the row with the right header. The <code>pd.read_csv()</code> function (<a href="https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html">documentation</a>) has the convenient <code>header</code> parameter that we can set to use the elements in row 1 as the appropriate columns:</p>
-<div id="9544ef61" class="cell" data-execution_count="32">
+<div id="8ab44140" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb52"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>, header<span class="op">=</span><span class="dv">1</span>) <span class="co"># row index</span></span>
 <span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a>tb_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
@@ -1689,7 +1689,7 @@ <h3 data-number="5.4.1" class="anchored" data-anchor-id="csvs-and-field-names"><
 </div>
 <p>Wait…but now we can’t differentiate betwen the “Number of TB cases” and “TB incidence” year columns. <code>pandas</code> has tried to make our lives easier by automatically adding “.1” to the latter columns, but this doesn’t help us, as humans, understand the data.</p>
 <p>We can do this manually with <code>df.rename()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html?highlight=rename#pandas.DataFrame.rename">documentation</a>):</p>
-<div id="01c44477" class="cell" data-execution_count="33">
+<div id="e78e75a2" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb53"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a>rename_dict <span class="op">=</span> {<span class="st">'2019'</span>: <span class="st">'TB cases 2019'</span>,</span>
 <span id="cb53-2"><a href="#cb53-2" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2020'</span>: <span class="st">'TB cases 2020'</span>,</span>
 <span id="cb53-3"><a href="#cb53-3" aria-hidden="true" tabindex="-1"></a>               <span class="st">'2021'</span>: <span class="st">'TB cases 2021'</span>,</span>
@@ -1779,7 +1779,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 <p>Row 0 is what we call a <strong>rollup record</strong>, or summary record. It’s often useful when displaying tables to humans. The <strong>granularity</strong> of record 0 (Totals) vs the rest of the records (States) is different.</p>
 <p>Okay, EDA step two. How was the rollup record aggregated?</p>
 <p>Let’s check if Total TB cases is the sum of all state TB cases. If we sum over all rows, we should get <strong>2x</strong> the total cases in each of our TB cases by year (why do you think this is?).</p>
-<div id="e05acf2d" class="cell" data-execution_count="34">
+<div id="3200b036" class="cell" data-execution_count="34">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb54"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a>tb_df.<span class="bu">sum</span>(axis<span class="op">=</span><span class="dv">0</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1796,7 +1796,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 </div>
 <p>Whoa, what’s going on with the TB cases in 2019, 2020, and 2021? Check out the column types:</p>
-<div id="8416d41f" class="cell" data-execution_count="35">
+<div id="e564d3af" class="cell" data-execution_count="35">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb56"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>tb_df.dtypes</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1814,7 +1814,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 <p>Since there are commas in the values for TB cases, the numbers are read as the <code>object</code> datatype, or <strong>storage type</strong> (close to the <code>Python</code> string datatype), so <code>pandas</code> is concatenating strings instead of adding integers (recall that Python can “sum”, or concatenate, strings together: <code>"data" + "100"</code> evaluates to <code>"data100"</code>).</p>
 <p>Fortunately <code>read_csv</code> also has a <code>thousands</code> parameter (<a href="https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html">documentation</a>):</p>
-<div id="40a881c3" class="cell" data-execution_count="36">
+<div id="7210d56c" class="cell" data-execution_count="36">
 <div class="sourceCode cell-code" id="cb58"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a><span class="co"># improve readability: chaining method calls with outer parentheses/line breaks</span></span>
 <span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> (</span>
 <span id="cb58-3"><a href="#cb58-3" aria-hidden="true" tabindex="-1"></a>    pd.read_csv(<span class="st">"data/cdc_tuberculosis.csv"</span>, header<span class="op">=</span><span class="dv">1</span>, thousands<span class="op">=</span><span class="st">','</span>)</span>
@@ -1895,7 +1895,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 </div>
 </div>
-<div id="cb2c52ce" class="cell" data-execution_count="37">
+<div id="aeb81ff4" class="cell" data-execution_count="37">
 <div class="sourceCode cell-code" id="cb59"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a>tb_df.<span class="bu">sum</span>()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="37">
 <pre><code>U.S. jurisdiction    TotalAlabamaAlaskaArizonaArkansasCaliforniaCol...
@@ -1910,7 +1910,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 </div>
 <p>The total TB cases look right. Phew!</p>
 <p>Let’s just look at the records with <strong>state-level granularity</strong>:</p>
-<div id="b0f4afc9" class="cell" data-execution_count="38">
+<div id="0d970c0a" class="cell" data-execution_count="38">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb61"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a>state_tb_df <span class="op">=</span> tb_df[<span class="dv">1</span>:]</span>
@@ -1995,7 +1995,7 @@ <h3 data-number="5.4.2" class="anchored" data-anchor-id="record-granularity"><sp
 <h3 data-number="5.4.3" class="anchored" data-anchor-id="gather-census-data"><span class="header-section-number">5.4.3</span> Gather Census Data</h3>
 <p>U.S. Census population estimates <a href="https://www.census.gov/data/tables/time-series/demo/popest/2010s-state-total.html">source</a> (2019), <a href="https://www.census.gov/data/tables/time-series/demo/popest/2020s-state-total.html">source</a> (2020-2021).</p>
 <p>Running the below cells cleans the data. There are a few new methods here: * <code>df.convert_dtypes()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.convert_dtypes.html">documentation</a>) conveniently converts all float dtypes into ints and is out of scope for the class. * <code>df.drop_na()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.dropna.html">documentation</a>) will be explained in more detail next time.</p>
-<div id="d2b0177b" class="cell" data-execution_count="39">
+<div id="8499e3f0" class="cell" data-execution_count="39">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb62"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb62-1"><a href="#cb62-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 2010s census data</span></span>
@@ -2119,7 +2119,7 @@ <h3 data-number="5.4.3" class="anchored" data-anchor-id="gather-census-data"><sp
 <p>or use <code>iPython</code> magic which will intelligently import code when files change:</p>
 <div class="sourceCode" id="cb64"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>load_ext autoreload</span>
 <span id="cb64-2"><a href="#cb64-2" aria-hidden="true" tabindex="-1"></a><span class="op">%</span>autoreload <span class="dv">2</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
-<div id="83a3e6e9" class="cell" data-execution_count="40">
+<div id="fbc1f597" class="cell" data-execution_count="40">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb65"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a><span class="co"># census 2020s data</span></span>
@@ -2196,7 +2196,7 @@ <h3 data-number="5.4.3" class="anchored" data-anchor-id="gather-census-data"><sp
 <section id="joining-data-merging-dataframes" class="level3" data-number="5.4.4">
 <h3 data-number="5.4.4" class="anchored" data-anchor-id="joining-data-merging-dataframes"><span class="header-section-number">5.4.4</span> Joining Data (Merging <code>DataFrame</code>s)</h3>
 <p>Time to <code>merge</code>! Here we use the <code>DataFrame</code> method <code>df1.merge(right=df2, ...)</code> on <code>DataFrame</code> <code>df1</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html">documentation</a>). Contrast this with the function <code>pd.merge(left=df1, right=df2, ...)</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.merge.html?highlight=pandas%20merge#pandas.merge">documentation</a>). Feel free to use either.</p>
-<div id="b76aaa93" class="cell" data-execution_count="41">
+<div id="21b777e9" class="cell" data-execution_count="41">
 <div class="sourceCode cell-code" id="cb66"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a><span class="co"># merge TB DataFrame with two US census DataFrames</span></span>
 <span id="cb66-2"><a href="#cb66-2" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
 <span id="cb66-3"><a href="#cb66-3" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
@@ -2371,7 +2371,7 @@ <h3 data-number="5.4.4" class="anchored" data-anchor-id="joining-data-merging-da
 </div>
 </div>
 <p>Having all of these columns is a little unwieldy. We could either drop the unneeded columns now, or just merge on smaller census <code>DataFrame</code>s. Let’s do the latter.</p>
-<div id="d9758e78" class="cell" data-execution_count="42">
+<div id="a0c4d0f1" class="cell" data-execution_count="42">
 <div class="sourceCode cell-code" id="cb67"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a><span class="co"># try merging again, but cleaner this time</span></span>
 <span id="cb67-2"><a href="#cb67-2" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
 <span id="cb67-3"><a href="#cb67-3" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
@@ -2484,7 +2484,7 @@ <h3 data-number="5.4.5" class="anchored" data-anchor-id="reproducing-data-comput
 <p><span class="math display">\[\text{TB incidence} = \frac{\text{TB cases in population}}{\text{groups in population}} = \frac{\text{TB cases in population}}{\text{population}/100000} \]</span></p>
 <p><span class="math display">\[= \frac{\text{TB cases in population}}{\text{population}} \times 100000\]</span></p>
 <p>Let’s try this for 2019:</p>
-<div id="84e90ee9" class="cell" data-execution_count="43">
+<div id="b952b471" class="cell" data-execution_count="43">
 <div class="sourceCode cell-code" id="cb68"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a>tb_census_df[<span class="st">"recompute incidence 2019"</span>] <span class="op">=</span> tb_census_df[<span class="st">"TB cases 2019"</span>]<span class="op">/</span>tb_census_df[<span class="st">"2019"</span>]<span class="op">*</span><span class="dv">100000</span></span>
 <span id="cb68-2"><a href="#cb68-2" aria-hidden="true" tabindex="-1"></a>tb_census_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="43">
@@ -2587,7 +2587,7 @@ <h3 data-number="5.4.5" class="anchored" data-anchor-id="reproducing-data-comput
 </div>
 <p>Awesome!!!</p>
 <p>Let’s use a for-loop and Python format strings to compute TB incidence for all years. Python f-strings are just used for the purposes of this demo, but they’re handy to know when you explore data beyond this course (<a href="https://docs.python.org/3/tutorial/inputoutput.html">documentation</a>).</p>
-<div id="3844e085" class="cell" data-execution_count="44">
+<div id="46353b32" class="cell" data-execution_count="44">
 <div class="sourceCode cell-code" id="cb69"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb69-1"><a href="#cb69-1" aria-hidden="true" tabindex="-1"></a><span class="co"># recompute incidence for all years</span></span>
 <span id="cb69-2"><a href="#cb69-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> year <span class="kw">in</span> [<span class="dv">2019</span>, <span class="dv">2020</span>, <span class="dv">2021</span>]:</span>
 <span id="cb69-3"><a href="#cb69-3" aria-hidden="true" tabindex="-1"></a>    tb_census_df[<span class="ss">f"recompute incidence </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>] <span class="op">=</span> tb_census_df[<span class="ss">f"TB cases </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">/</span>tb_census_df[<span class="ss">f"</span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">*</span><span class="dv">100000</span></span>
@@ -2703,7 +2703,7 @@ <h3 data-number="5.4.5" class="anchored" data-anchor-id="reproducing-data-comput
 </div>
 </div>
 <p>These numbers look pretty close!!! There are a few errors in the hundredths place, particularly in 2021. It may be useful to further explore reasons behind this discrepancy.</p>
-<div id="b94e2144" class="cell" data-execution_count="45">
+<div id="f051eb0b" class="cell" data-execution_count="45">
 <div class="sourceCode cell-code" id="cb70"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb70-1"><a href="#cb70-1" aria-hidden="true" tabindex="-1"></a>tb_census_df.describe()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="45">
 <div>
@@ -2864,7 +2864,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 <p>This is TB incidence computed across the entire U.S. population! How do we reproduce this? * We need to reproduce the “Total” TB incidences in our rolled record. * But our current <code>tb_census_df</code> only has 51 entries (50 states plus Washington, D.C.). There is no rolled record. * What happened…?</p>
 <p>Let’s get exploring!</p>
 <p>Before we keep exploring, we’ll set all indexes to more meaningful values, instead of just numbers that pertain to some row at some point. This will make our cleaning slightly easier.</p>
-<div id="3a139a70" class="cell" data-execution_count="46">
+<div id="2c185b04" class="cell" data-execution_count="46">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb71"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb71-1"><a href="#cb71-1" aria-hidden="true" tabindex="-1"></a>tb_df <span class="op">=</span> tb_df.set_index(<span class="st">"U.S. jurisdiction"</span>)</span>
@@ -2947,7 +2947,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 </div>
-<div id="bd1b17bd" class="cell" data-execution_count="47">
+<div id="b515aa46" class="cell" data-execution_count="47">
 <div class="sourceCode cell-code" id="cb72"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb72-1"><a href="#cb72-1" aria-hidden="true" tabindex="-1"></a>census_2010s_df <span class="op">=</span> census_2010s_df.set_index(<span class="st">"Geographic Area"</span>)</span>
 <span id="cb72-2"><a href="#cb72-2" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="47">
@@ -3055,7 +3055,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 </div>
-<div id="e5133dcf" class="cell" data-execution_count="48">
+<div id="e2b72529" class="cell" data-execution_count="48">
 <div class="sourceCode cell-code" id="cb73"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb73-1"><a href="#cb73-1" aria-hidden="true" tabindex="-1"></a>census_2020s_df <span class="op">=</span> census_2020s_df.set_index(<span class="st">"Geographic Area"</span>)</span>
 <span id="cb73-2"><a href="#cb73-2" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="48">
@@ -3115,7 +3115,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 <p>It turns out that our merge above only kept state records, even though our original <code>tb_df</code> had the “Total” rolled record:</p>
-<div id="9fbb35eb" class="cell" data-execution_count="49">
+<div id="0213c2ce" class="cell" data-execution_count="49">
 <div class="sourceCode cell-code" id="cb74"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb74-1"><a href="#cb74-1" aria-hidden="true" tabindex="-1"></a>tb_df.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="49">
 <div>
@@ -3196,7 +3196,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p>Recall that <code>merge</code> by default does an <strong>inner</strong> merge by default, meaning that it only preserves keys that are present in <strong>both</strong> <code>DataFrame</code>s.</p>
 <p>The rolled records in our census <code>DataFrame</code> have different <code>Geographic Area</code> fields, which was the key we merged on:</p>
-<div id="11809bf5" class="cell" data-execution_count="50">
+<div id="a1b1b243" class="cell" data-execution_count="50">
 <div class="sourceCode cell-code" id="cb75"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb75-1"><a href="#cb75-1" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="50">
 <div>
@@ -3305,7 +3305,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p>The Census <code>DataFrame</code> has several rolled records. The aggregate record we are looking for actually has the Geographic Area named “United States”.</p>
 <p>One straightforward way to get the right merge is to rename the value itself. Because we now have the Geographic Area index, we’ll use <code>df.rename()</code> (<a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rename.html">documentation</a>):</p>
-<div id="6fc597b9" class="cell" data-execution_count="51">
+<div id="694a6f87" class="cell" data-execution_count="51">
 <div class="sourceCode cell-code" id="cb76"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb76-1"><a href="#cb76-1" aria-hidden="true" tabindex="-1"></a><span class="co"># rename rolled record for 2010s</span></span>
 <span id="cb76-2"><a href="#cb76-2" aria-hidden="true" tabindex="-1"></a>census_2010s_df.rename(index<span class="op">=</span>{<span class="st">'United States'</span>:<span class="st">'Total'</span>}, inplace<span class="op">=</span><span class="va">True</span>)</span>
 <span id="cb76-3"><a href="#cb76-3" aria-hidden="true" tabindex="-1"></a>census_2010s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3414,7 +3414,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 </div>
 </div>
-<div id="1acaa6fb" class="cell" data-execution_count="52">
+<div id="fabe630c" class="cell" data-execution_count="52">
 <div class="sourceCode cell-code" id="cb77"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb77-1"><a href="#cb77-1" aria-hidden="true" tabindex="-1"></a><span class="co"># same, but for 2020s rename rolled record</span></span>
 <span id="cb77-2"><a href="#cb77-2" aria-hidden="true" tabindex="-1"></a>census_2020s_df.rename(index<span class="op">=</span>{<span class="st">'United States'</span>:<span class="st">'Total'</span>}, inplace<span class="op">=</span><span class="va">True</span>)</span>
 <span id="cb77-3"><a href="#cb77-3" aria-hidden="true" tabindex="-1"></a>census_2020s_df.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3476,7 +3476,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p><br></p>
 <p>Next let’s rerun our merge. Note the different chaining, because we are now merging on indexes (<code>df.merge()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html">documentation</a>).</p>
-<div id="573e41a7" class="cell" data-execution_count="53">
+<div id="622bb969" class="cell" data-execution_count="53">
 <div class="sourceCode cell-code" id="cb78"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb78-1"><a href="#cb78-1" aria-hidden="true" tabindex="-1"></a>tb_census_df <span class="op">=</span> (</span>
 <span id="cb78-2"><a href="#cb78-2" aria-hidden="true" tabindex="-1"></a>    tb_df</span>
 <span id="cb78-3"><a href="#cb78-3" aria-hidden="true" tabindex="-1"></a>    .merge(right<span class="op">=</span>census_2010s_df[[<span class="st">"2019"</span>]],</span>
@@ -3573,7 +3573,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 </div>
 <p><br></p>
 <p>Finally, let’s recompute our incidences:</p>
-<div id="b671e985" class="cell" data-execution_count="54">
+<div id="81813b68" class="cell" data-execution_count="54">
 <div class="sourceCode cell-code" id="cb79"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb79-1"><a href="#cb79-1" aria-hidden="true" tabindex="-1"></a><span class="co"># recompute incidence for all years</span></span>
 <span id="cb79-2"><a href="#cb79-2" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> year <span class="kw">in</span> [<span class="dv">2019</span>, <span class="dv">2020</span>, <span class="dv">2021</span>]:</span>
 <span id="cb79-3"><a href="#cb79-3" aria-hidden="true" tabindex="-1"></a>    tb_census_df[<span class="ss">f"recompute incidence </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>] <span class="op">=</span> tb_census_df[<span class="ss">f"TB cases </span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">/</span>tb_census_df[<span class="ss">f"</span><span class="sc">{</span>year<span class="sc">}</span><span class="ss">"</span>]<span class="op">*</span><span class="dv">100000</span></span>
@@ -3688,21 +3688,21 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 <p>Reported TB incidence (cases per 100,000 persons) increased <strong>9.4%</strong>, from <strong>2.2</strong> during 2020 to <strong>2.4</strong> during 2021 but was lower than incidence during 2019 (2.7). Increases occurred among both U.S.-born and non–U.S.-born persons.</p>
 </blockquote>
 <p>Recall that percent change from <span class="math inline">\(A\)</span> to <span class="math inline">\(B\)</span> is computed as <span class="math inline">\(\text{percent change} = \frac{B - A}{A} \times 100\)</span>.</p>
-<div id="cbba0b50" class="cell" data-execution_count="55">
+<div id="889fbe46" class="cell" data-execution_count="55">
 <div class="sourceCode cell-code" id="cb80"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb80-1"><a href="#cb80-1" aria-hidden="true" tabindex="-1"></a>incidence_2020 <span class="op">=</span> tb_census_df.loc[<span class="st">'Total'</span>, <span class="st">'recompute incidence 2020'</span>]</span>
 <span id="cb80-2"><a href="#cb80-2" aria-hidden="true" tabindex="-1"></a>incidence_2020</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="55">
 <pre><code>np.float64(2.1637257652759883)</code></pre>
 </div>
 </div>
-<div id="91b20ab0" class="cell" data-execution_count="56">
+<div id="6366f58c" class="cell" data-execution_count="56">
 <div class="sourceCode cell-code" id="cb82"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb82-1"><a href="#cb82-1" aria-hidden="true" tabindex="-1"></a>incidence_2021 <span class="op">=</span> tb_census_df.loc[<span class="st">'Total'</span>, <span class="st">'recompute incidence 2021'</span>]</span>
 <span id="cb82-2"><a href="#cb82-2" aria-hidden="true" tabindex="-1"></a>incidence_2021</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="56">
 <pre><code>np.float64(2.3672448914298068)</code></pre>
 </div>
 </div>
-<div id="80410d89" class="cell" data-execution_count="57">
+<div id="df0ca31e" class="cell" data-execution_count="57">
 <div class="sourceCode cell-code" id="cb84"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb84-1"><a href="#cb84-1" aria-hidden="true" tabindex="-1"></a>difference <span class="op">=</span> (incidence_2021 <span class="op">-</span> incidence_2020)<span class="op">/</span>incidence_2020 <span class="op">*</span> <span class="dv">100</span></span>
 <span id="cb84-2"><a href="#cb84-2" aria-hidden="true" tabindex="-1"></a>difference</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="57">
@@ -3714,7 +3714,7 @@ <h3 data-number="5.4.6" class="anchored" data-anchor-id="bonus-eda-reproducing-t
 <section id="eda-demo-2-mauna-loa-co2-data-a-lesson-in-data-faithfulness" class="level2" data-number="5.5">
 <h2 data-number="5.5" class="anchored" data-anchor-id="eda-demo-2-mauna-loa-co2-data-a-lesson-in-data-faithfulness"><span class="header-section-number">5.5</span> EDA Demo 2: Mauna Loa CO<sub>2</sub> Data – A Lesson in Data Faithfulness</h2>
 <p><a href="https://gml.noaa.gov/ccgg/trends/data.html">Mauna Loa Observatory</a> has been monitoring CO<sub>2</sub> concentrations since 1958.</p>
-<div id="0b14dfe8" class="cell" data-execution_count="58">
+<div id="6318cfae" class="cell" data-execution_count="58">
 <div class="sourceCode cell-code" id="cb86"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb86-1"><a href="#cb86-1" aria-hidden="true" tabindex="-1"></a>co2_file <span class="op">=</span> <span class="st">"data/co2_mm_mlo.txt"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>Let’s do some <strong>EDA</strong>!!</p>
@@ -3739,7 +3739,7 @@ <h3 data-number="5.5.1" class="anchored" data-anchor-id="reading-this-file-into-
 <li>The 71st and 72nd lines in the file contain column headings split over two lines.</li>
 </ul>
 <p>We can use&nbsp;<code>read_csv</code>&nbsp;to read the data into a <code>pandas</code> <code>DataFrame</code>, and we provide several arguments to specify that the separators are white space, there is no header (<strong>we will set our own column names</strong>), and to skip the first 72 rows of the file.</p>
-<div id="301b84ea" class="cell" data-execution_count="59">
+<div id="c023b1a9" class="cell" data-execution_count="59">
 <div class="sourceCode cell-code" id="cb88"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb88-1"><a href="#cb88-1" aria-hidden="true" tabindex="-1"></a>co2 <span class="op">=</span> pd.read_csv(</span>
 <span id="cb88-2"><a href="#cb88-2" aria-hidden="true" tabindex="-1"></a>    co2_file, header <span class="op">=</span> <span class="va">None</span>, skiprows <span class="op">=</span> <span class="dv">72</span>,</span>
 <span id="cb88-3"><a href="#cb88-3" aria-hidden="true" tabindex="-1"></a>    sep <span class="op">=</span> <span class="vs">r'\s+'</span>       <span class="co">#delimiter for continuous whitespace (stay tuned for regex next lecture))</span></span>
@@ -3827,7 +3827,7 @@ <h3 data-number="5.5.1" class="anchored" data-anchor-id="reading-this-file-into-
 <h3 data-number="5.5.2" class="anchored" data-anchor-id="exploring-variable-feature-types"><span class="header-section-number">5.5.2</span> Exploring Variable Feature Types</h3>
 <p>The NOAA <a href="https://gml.noaa.gov/ccgg/trends/">webpage</a> might have some useful tidbits (in this case it doesn’t).</p>
 <p>Using this information, we’ll rerun <code>pd.read_csv</code>, but this time with some <strong>custom column names.</strong></p>
-<div id="02baedd4" class="cell" data-execution_count="60">
+<div id="80234df4" class="cell" data-execution_count="60">
 <div class="sourceCode cell-code" id="cb89"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb89-1"><a href="#cb89-1" aria-hidden="true" tabindex="-1"></a>co2 <span class="op">=</span> pd.read_csv(</span>
 <span id="cb89-2"><a href="#cb89-2" aria-hidden="true" tabindex="-1"></a>    co2_file, header <span class="op">=</span> <span class="va">None</span>, skiprows <span class="op">=</span> <span class="dv">72</span>,</span>
 <span id="cb89-3"><a href="#cb89-3" aria-hidden="true" tabindex="-1"></a>    sep <span class="op">=</span> <span class="st">'\s+'</span>, <span class="co">#regex for continuous whitespace (next lecture)</span></span>
@@ -3843,7 +3843,7 @@ <h3 data-number="5.5.2" class="anchored" data-anchor-id="exploring-variable-feat
 
 invalid escape sequence '\s'
 
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98202/150137587.py:3: SyntaxWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32669/150137587.py:3: SyntaxWarning:
 
 invalid escape sequence '\s'
 </code></pre>
@@ -3926,7 +3926,7 @@ <h3 data-number="5.5.2" class="anchored" data-anchor-id="exploring-variable-feat
 <section id="visualizing-co2" class="level3" data-number="5.5.3">
 <h3 data-number="5.5.3" class="anchored" data-anchor-id="visualizing-co2"><span class="header-section-number">5.5.3</span> Visualizing CO<sub>2</sub></h3>
 <p>Scientific studies tend to have very clean data, right…? Let’s jump right in and make a time series plot of CO<sub>2</sub> monthly averages.</p>
-<div id="39fda0d5" class="cell" data-execution_count="61">
+<div id="2d6f7fe6" class="cell" data-execution_count="61">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb91"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb91-1"><a href="#cb91-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3941,7 +3941,7 @@ <h3 data-number="5.5.3" class="anchored" data-anchor-id="visualizing-co2"><span
 </div>
 <p>The code above uses the <code>seaborn</code> plotting library (abbreviated <code>sns</code>). We will cover this in the Visualization lecture, but now you don’t need to worry about how it works!</p>
 <p>Yikes! Plotting the data uncovered a problem. The sharp vertical lines suggest that we have some <strong>missing values</strong>. What happened here?</p>
-<div id="4f018030" class="cell" data-execution_count="62">
+<div id="3ec2fd4e" class="cell" data-execution_count="62">
 <div class="sourceCode cell-code" id="cb92"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb92-1"><a href="#cb92-1" aria-hidden="true" tabindex="-1"></a>co2.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="62">
 <div>
@@ -4017,7 +4017,7 @@ <h3 data-number="5.5.3" class="anchored" data-anchor-id="visualizing-co2"><span
 </div>
 </div>
 </div>
-<div id="2fe75e23" class="cell" data-execution_count="63">
+<div id="50194ebf" class="cell" data-execution_count="63">
 <div class="sourceCode cell-code" id="cb93"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb93-1"><a href="#cb93-1" aria-hidden="true" tabindex="-1"></a>co2.tail()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="63">
 <div>
@@ -4110,7 +4110,7 @@ <h3 data-number="5.5.4" class="anchored" data-anchor-id="sanity-checks-reasoning
 <li>Data from March 1958 to August 2019.</li>
 <li>We should have $ 12 (2019-1957) - 2 - 4 = 738 $ records.</li>
 </ul>
-<div id="05b573cd" class="cell" data-execution_count="64">
+<div id="afbea991" class="cell" data-execution_count="64">
 <div class="sourceCode cell-code" id="cb94"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb94-1"><a href="#cb94-1" aria-hidden="true" tabindex="-1"></a>co2.shape</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="64">
 <pre><code>(738, 7)</code></pre>
@@ -4124,7 +4124,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <p><code>Days</code> is a time field, so let’s analyze other time fields to see if there is an explanation for missing values of days of operation.</p>
 <p>Let’s start with <strong>months</strong>, <code>Mo</code>.</p>
 <p>Are we missing any records? The number of months should have 62 or 61 instances (March 1957-August 2019).</p>
-<div id="08af1cc2" class="cell" data-execution_count="65">
+<div id="f74d998f" class="cell" data-execution_count="65">
 <div class="sourceCode cell-code" id="cb96"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb96-1"><a href="#cb96-1" aria-hidden="true" tabindex="-1"></a>co2[<span class="st">"Mo"</span>].value_counts().sort_index()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="65">
 <pre><code>Mo
@@ -4146,7 +4146,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <p>As expected Jan, Feb, Sep, Oct, Nov, and Dec have 61 occurrences and the rest 62.</p>
 <p><br></p>
 <p>Next let’s explore <strong>days</strong> <code>Days</code> itself, which is the number of days that the measurement equipment worked.</p>
-<div id="15d225b2" class="cell" data-execution_count="66">
+<div id="37e39ab3" class="cell" data-execution_count="66">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb98"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb98-1"><a href="#cb98-1" aria-hidden="true" tabindex="-1"></a>sns.displot(co2[<span class="st">'Days'</span>])<span class="op">;</span></span>
@@ -4164,7 +4164,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <p><br></p>
 <p>Finally, let’s check the last time feature, <strong>year</strong> <code>Yr</code>.</p>
 <p>Let’s check to see if there is any connection between missing-ness and the year of the recording.</p>
-<div id="3621309f" class="cell" data-execution_count="67">
+<div id="8d9d0123" class="cell" data-execution_count="67">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb99"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb99-1"><a href="#cb99-1" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(x<span class="op">=</span><span class="st">"Yr"</span>, y<span class="op">=</span><span class="st">"Days"</span>, data<span class="op">=</span>co2)<span class="op">;</span></span>
@@ -4193,7 +4193,7 @@ <h3 data-number="5.5.5" class="anchored" data-anchor-id="understanding-missing-v
 <section id="understanding-missing-value-2-avg" class="level3" data-number="5.5.6">
 <h3 data-number="5.5.6" class="anchored" data-anchor-id="understanding-missing-value-2-avg"><span class="header-section-number">5.5.6</span> Understanding Missing Value 2: <code>Avg</code></h3>
 <p>Next, let’s return to the -99.99 values in <code>Avg</code> to analyze the overall quality of the CO<sub>2</sub> measurements. We’ll plot a histogram of the average CO<sub>2</sub> measurements</p>
-<div id="77e6a813" class="cell" data-execution_count="68">
+<div id="0bcae948" class="cell" data-execution_count="68">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb100"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb100-1"><a href="#cb100-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Histograms of average CO2 measurements</span></span>
@@ -4209,7 +4209,7 @@ <h3 data-number="5.5.6" class="anchored" data-anchor-id="understanding-missing-v
 </div>
 <p>The non-missing values are in the 300-400 range (a regular range of CO<sub>2</sub> levels).</p>
 <p>We also see that there are only a few missing <code>Avg</code> values (<strong>&lt;1% of values</strong>). Let’s examine all of them:</p>
-<div id="d66cd470" class="cell" data-execution_count="69">
+<div id="79a50eea" class="cell" data-execution_count="69">
 <div class="sourceCode cell-code" id="cb101"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb101-1"><a href="#cb101-1" aria-hidden="true" tabindex="-1"></a>co2[co2[<span class="st">"Avg"</span>] <span class="op">&lt;</span> <span class="dv">0</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="69">
 <div>
@@ -4316,7 +4316,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <li>Impute using some strategy</li>
 </ol>
 <p>Remember we want to fix the following plot:</p>
-<div id="fb7010be" class="cell" data-execution_count="70">
+<div id="f24fcf6b" class="cell" data-execution_count="70">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb102"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb102-1"><a href="#cb102-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2)</span>
@@ -4334,7 +4334,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <p>Let’s consider a few options: 1. Drop those records 2. Replace -99.99 with NaN 3. Substitute it with a likely value for the average CO<sub>2</sub>?</p>
 <p>What do you think are the pros and cons of each possible action?</p>
 <p>Let’s examine each of these three options.</p>
-<div id="68bb099a" class="cell" data-execution_count="71">
+<div id="8a65b5cd" class="cell" data-execution_count="71">
 <div class="sourceCode cell-code" id="cb103"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb103-1"><a href="#cb103-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 1. Drop missing values</span></span>
 <span id="cb103-2"><a href="#cb103-2" aria-hidden="true" tabindex="-1"></a>co2_drop <span class="op">=</span> co2[co2[<span class="st">'Avg'</span>] <span class="op">&gt;</span> <span class="dv">0</span>]</span>
 <span id="cb103-3"><a href="#cb103-3" aria-hidden="true" tabindex="-1"></a>co2_drop.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -4412,7 +4412,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 </div>
 </div>
 </div>
-<div id="2b766ade" class="cell" data-execution_count="72">
+<div id="27a223f7" class="cell" data-execution_count="72">
 <div class="sourceCode cell-code" id="cb104"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb104-1"><a href="#cb104-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 2. Replace NaN with -99.99</span></span>
 <span id="cb104-2"><a href="#cb104-2" aria-hidden="true" tabindex="-1"></a>co2_NA <span class="op">=</span> co2.replace(<span class="op">-</span><span class="fl">99.99</span>, np.nan)</span>
 <span id="cb104-3"><a href="#cb104-3" aria-hidden="true" tabindex="-1"></a>co2_NA.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -4498,7 +4498,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 </blockquote>
 <p>The <code>Int</code> feature has values that exactly match those in <code>Avg</code>, except when <code>Avg</code> is -99.99, and then a <strong>reasonable</strong> estimate is used instead.</p>
 <p>So, the third version of our data will use the <code>Int</code> feature instead of <code>Avg</code>.</p>
-<div id="9b2d57d3" class="cell" data-execution_count="73">
+<div id="b543a679" class="cell" data-execution_count="73">
 <div class="sourceCode cell-code" id="cb105"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb105-1"><a href="#cb105-1" aria-hidden="true" tabindex="-1"></a><span class="co"># 3. Use interpolated column which estimates missing Avg values</span></span>
 <span id="cb105-2"><a href="#cb105-2" aria-hidden="true" tabindex="-1"></a>co2_impute <span class="op">=</span> co2.copy()</span>
 <span id="cb105-3"><a href="#cb105-3" aria-hidden="true" tabindex="-1"></a>co2_impute[<span class="st">'Avg'</span>] <span class="op">=</span> co2[<span class="st">'Int'</span>]</span>
@@ -4579,7 +4579,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 </div>
 <p>What’s a <strong>reasonable</strong> estimate?</p>
 <p>To answer this question, let’s zoom in on a short time period, say the measurements in 1958 (where we know we have two missing values).</p>
-<div id="daf063f3" class="cell" data-execution_count="74">
+<div id="35b9ec5f" class="cell" data-execution_count="74">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb106"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb106-1"><a href="#cb106-1" aria-hidden="true" tabindex="-1"></a><span class="co"># results of plotting data in 1958</span></span>
@@ -4622,7 +4622,7 @@ <h3 data-number="5.5.7" class="anchored" data-anchor-id="drop-nan-or-impute-miss
 <li>We are plotting all months in our data as a line plot</li>
 </ul>
 <p>Let’s replot our original figure with option 3:</p>
-<div id="6c4e49a0" class="cell" data-execution_count="75">
+<div id="0dabaf95" class="cell" data-execution_count="75">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb107"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb107-1"><a href="#cb107-1" aria-hidden="true" tabindex="-1"></a>sns.lineplot(x<span class="op">=</span><span class="st">'DecDate'</span>, y<span class="op">=</span><span class="st">'Avg'</span>, data<span class="op">=</span>co2_impute)</span>
@@ -4654,7 +4654,7 @@ <h3 data-number="5.5.8" class="anchored" data-anchor-id="presenting-the-data-a-d
 <ul>
 <li>You might be happier with a <strong>coarser granularity</strong> of average year data!</li>
 </ul>
-<div id="745e1cdb" class="cell" data-execution_count="76">
+<div id="537f7ede" class="cell" data-execution_count="76">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb108"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb108-1"><a href="#cb108-1" aria-hidden="true" tabindex="-1"></a>co2_year <span class="op">=</span> co2_impute.groupby(<span class="st">'Yr'</span>).mean()</span>
diff --git a/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf
index 3e9ad15c..66a81e6f 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-62-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf
index 1adbb8a8..e1332108 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-67-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf
index caaa2442..46bd4c86 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-68-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf
index a9e67b0d..a92a35b8 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-69-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf
index 0f45926d..09295a5d 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-71-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf
index acac77eb..3d19e590 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-75-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf
index a56fd623..4b847a8e 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-76-output-1.pdf differ
diff --git a/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf b/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf
index e075ec97..28e661b8 100644
Binary files a/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf and b/docs/eda/eda_files/figure-pdf/cell-77-output-1.pdf differ
diff --git a/docs/feature_engineering/feature_engineering.html b/docs/feature_engineering/feature_engineering.html
index ca7e55df..aa120b17 100644
--- a/docs/feature_engineering/feature_engineering.html
+++ b/docs/feature_engineering/feature_engineering.html
@@ -377,7 +377,7 @@ <h3 data-number="14.1.1" class="anchored" data-anchor-id="gradient-descent-on-mu
 <p>The function we worked with above was one-dimensional – we were only minimizing the function with respect to a single parameter, <span class="math inline">\(\theta\)</span>. However, models usually have a cost function with multiple parameters that need to be optimized. For example, simple linear regression has 2 parameters: <span class="math display">\[\hat{y} + \theta_0 + \theta_1x\]</span> and multiple linear regression has <span class="math inline">\(p+1\)</span> parameters: <span class="math display">\[\mathbb{Y} = \theta_0 + \theta_1 \Bbb{X}_{:,1} + \theta_2 \Bbb{X}_{:,2} + \cdots + \theta_p \Bbb{X}_{:,p}\]</span></p>
 <p>We’ll need to expand gradient descent so we can update our guesses for all model parameters all in one go.</p>
 <p>With multiple parameters to optimize, we consider a <strong>loss surface</strong>, or the model’s loss for a particular <em>combination</em> of possible parameter values.</p>
-<div id="6df9289f" class="cell" data-execution_count="1">
+<div id="6d792c2b" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
@@ -385,7 +385,7 @@ <h3 data-number="14.1.1" class="anchored" data-anchor-id="gradient-descent-on-mu
 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> sns.load_dataset(<span class="st">"tips"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="4882176d" class="cell" data-execution_count="2">
+<div id="09cdaa1d" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.graph_objects <span class="im">as</span> go</span>
@@ -426,9 +426,9 @@ <h3 data-number="14.1.1" class="anchored" data-anchor-id="gradient-descent-on-mu
 <span id="cb2-36"><a href="#cb2-36" aria-hidden="true" tabindex="-1"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="620ecdf7-f163-4db9-9e04-3a1b368f7fa1" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("620ecdf7-f163-4db9-9e04-3a1b368f7fa1")) {                    Plotly.newPlot(                        "620ecdf7-f163-4db9-9e04-3a1b368f7fa1",                        [{"x":[[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0]],"y":[[-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1],[-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001],[-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17],[0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999],[0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998],[0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997],[0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998],[0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997],[0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993],[0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35]],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.275070617827869],[2.0777683729508207,1.6739580106759773,1.3689130804998992,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.325169911100991,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374208,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715135,5.075049003073768],[2.667486278688523,3.1430511441003834,3.717381441611009,4.390477171220399,5.162338332928555,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066682,8.419703049420658,9.532486392873402,10.74403516842491,12.054349376075182,13.463429015824218,14.971274087672022,16.57788459161859,18.283260527663924],[12.665437323770481,14.020377416869042,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250945,22.27655936384334,24.224092049534487,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159835]],"type":"surface"},{"marker":{"color":"red","size":10},"name":"Optimal Point","x":[1.1111111111111112],"y":[0.09999999999999998],"z":[1.0463708825642581],"type":"scatter3d"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"scene":{"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"zaxis":{"title":{"text":"MSE"}}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="6bce510d-34eb-405b-9bf9-7501d4f3dbf6" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("6bce510d-34eb-405b-9bf9-7501d4f3dbf6")) {                    Plotly.newPlot(                        "6bce510d-34eb-405b-9bf9-7501d4f3dbf6",                        [{"x":[[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0]],"y":[[-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1,-0.1],[-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001,-0.05000000000000001],[-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17,-1.3877787807814457e-17],[0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999,0.04999999999999999],[0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998,0.09999999999999998],[0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997,0.14999999999999997],[0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998,0.19999999999999998],[0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997,0.24999999999999997],[0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993,0.29999999999999993],[0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35,0.35]],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.275070617827869],[2.0777683729508207,1.6739580106759773,1.3689130804998992,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.325169911100991,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374208,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715135,5.075049003073768],[2.667486278688523,3.1430511441003834,3.717381441611009,4.390477171220399,5.162338332928555,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066682,8.419703049420658,9.532486392873402,10.74403516842491,12.054349376075182,13.463429015824218,14.971274087672022,16.57788459161859,18.283260527663924],[12.665437323770481,14.020377416869042,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250945,22.27655936384334,24.224092049534487,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159835]],"type":"surface"},{"marker":{"color":"red","size":10},"name":"Optimal Point","x":[1.1111111111111112],"y":[0.09999999999999998],"z":[1.0463708825642581],"type":"scatter3d"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"scene":{"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"zaxis":{"title":{"text":"MSE"}}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('620ecdf7-f163-4db9-9e04-3a1b368f7fa1');
+var gd = document.getElementById('6bce510d-34eb-405b-9bf9-7501d4f3dbf6');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -454,7 +454,7 @@ <h3 data-number="14.1.1" class="anchored" data-anchor-id="gradient-descent-on-mu
 </div>
 </div>
 <p>We can also visualize a bird’s-eye view of the loss surface from above using a contour plot:</p>
-<div id="33bf197f" class="cell" data-execution_count="3">
+<div id="9043d924" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>contour <span class="op">=</span> go.Contour(x<span class="op">=</span>u[<span class="dv">0</span>], y<span class="op">=</span>v[:, <span class="dv">0</span>], z<span class="op">=</span>np.reshape(MSE, u.shape))</span>
@@ -466,9 +466,9 @@ <h3 data-number="14.1.1" class="anchored" data-anchor-id="gradient-descent-on-mu
 <span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="2803560a-f4c3-49b0-bd86-c2f941012be7" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("2803560a-f4c3-49b0-bd86-c2f941012be7")) {                    Plotly.newPlot(                        "2803560a-f4c3-49b0-bd86-c2f941012be7",                        [{"x":[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],"y":[-0.1,-0.05000000000000001,-1.3877787807814457e-17,0.04999999999999999,0.09999999999999998,0.14999999999999997,0.19999999999999998,0.24999999999999997,0.29999999999999993,0.35],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.275070617827869],[2.0777683729508207,1.6739580106759773,1.3689130804998992,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.325169911100991,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374208,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715135,5.075049003073768],[2.667486278688523,3.1430511441003834,3.717381441611009,4.390477171220399,5.162338332928555,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066682,8.419703049420658,9.532486392873402,10.74403516842491,12.054349376075182,13.463429015824218,14.971274087672022,16.57788459161859,18.283260527663924],[12.665437323770481,14.020377416869042,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250945,22.27655936384334,24.224092049534487,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159835]],"type":"contour"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="0db08455-3fc5-41c1-8e1b-6b72a329185b" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("0db08455-3fc5-41c1-8e1b-6b72a329185b")) {                    Plotly.newPlot(                        "0db08455-3fc5-41c1-8e1b-6b72a329185b",                        [{"x":[0.0,0.2222222222222222,0.4444444444444444,0.6666666666666666,0.8888888888888888,1.1111111111111112,1.3333333333333333,1.5555555555555554,1.7777777777777777,2.0],"y":[-0.1,-0.05000000000000001,-1.3877787807814457e-17,0.04999999999999999,0.09999999999999998,0.14999999999999997,0.19999999999999998,0.24999999999999997,0.29999999999999993,0.35],"z":[[29.123031979508195,26.96047116185995,24.896675776310463,22.931645822859743,21.065381301507795,19.297882212254603,17.629148555100183,16.059180330044526,14.587977537087637,13.21554017622951],[18.833628650614756,17.11075544680986,15.486647675103727,13.961305335496359,12.534728427987758,11.20691695257792,9.97787090926685,8.847590298054545,7.816075118941006,6.883325371926231],[10.896283606557377,9.613098016595831,8.428677858733053,7.343023132969036,6.356133839303786,5.468009977737301,4.678651548269582,3.9880585509006288,3.3962309856304405,2.903168852459017],[5.310996847336067,4.467498871217872,3.722766327198442,3.0767992152777786,2.5295975354558795,2.0811612877327472,1.7314904721083795,1.4805850885827772,1.3284451371559403,1.275070617827869],[2.0777683729508207,1.6739580106759773,1.3689130804998992,1.1626335824225869,1.05511951644404,1.0463708825642581,1.1363876807832418,1.325169911100991,1.6127175735175057,1.9990306680327863],[1.196598183401639,1.232475434970147,1.3671181186374208,1.6005262344034599,1.9326997822682639,2.3636387622318336,2.8933431742941687,3.521813018455269,4.249048294715135,5.075049003073768],[2.667486278688523,3.1430511441003834,3.717381441611009,4.390477171220399,5.162338332928555,6.032964926735477,7.002356952641165,8.070514410645615,9.237437300748834,10.503125622950817],[6.490432658811471,7.405685138066682,8.419703049420658,9.532486392873402,10.74403516842491,12.054349376075182,13.463429015824218,14.971274087672022,16.57788459161859,18.283260527663924],[12.665437323770481,14.020377416869042,15.474082942066373,17.02655389936246,18.677790288757322,20.427792110250945,22.27655936384334,24.224092049534487,26.27039016732441,28.4154537172131],[21.19250027356557,22.98712798050748,24.880521119548167,26.87267969068761,28.96360369392582,31.153293129262796,33.44174799669854,35.828968296233036,38.314954027866314,40.89970519159835]],"type":"contour"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"title":{"text":"theta0"}},"yaxis":{"title":{"text":"theta1"}},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('2803560a-f4c3-49b0-bd86-c2f941012be7');
+var gd = document.getElementById('0db08455-3fc5-41c1-8e1b-6b72a329185b');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -708,7 +708,7 @@ <h2 data-number="14.4" class="anchored" data-anchor-id="one-hot-encoding"><span
 <p>Feature engineering opens up a whole new set of possibilities for designing better-performing models. As you will see in lab and homework, feature engineering is one of the most important parts of the entire modeling process.</p>
 <p>A particularly powerful use of feature engineering is to allow us to perform regression on <em>non-numeric</em> features. <strong>One-hot encoding</strong> is a feature engineering technique that generates numeric features from categorical data, allowing us to use our usual methods to fit a regression model on the data.</p>
 <p>To illustrate how this works, we’ll refer back to the <code>tips</code> dataset from previous lectures. Consider the <code>"day"</code> column of the dataset:</p>
-<div id="8c7fab1e" class="cell" data-execution_count="4">
+<div id="f5325526" class="cell" data-execution_count="4">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -808,7 +808,7 @@ <h2 data-number="14.4" class="anchored" data-anchor-id="one-hot-encoding"><span
 </li>
 </ul>
 <p>The <code>OneHotEncoder</code> class of <code>sklearn</code> (<a href="https://scikit-learn.org/stable/modules/generated/sklearn.preprocessing.OneHotEncoder.html#sklearn.preprocessing.OneHotEncoder.get_feature_names_out">documentation</a>) offers a quick way to perform this one-hot encoding. You will explore its use in detail in the lab. For now, recognize that we follow a very similar workflow to when we were working with the <code>LinearRegression</code> class: we initialize a <code>OneHotEncoder</code> object, fit it to our data, and finally use <code>.transform</code> to apply the fitted encoder.</p>
-<div id="9ae1d750" class="cell" data-execution_count="5">
+<div id="4813f817" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.preprocessing <span class="im">import</span> OneHotEncoder</span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Initialize a OneHotEncoder object</span></span>
@@ -887,7 +887,7 @@ <h2 data-number="14.4" class="anchored" data-anchor-id="one-hot-encoding"><span
 <p><span class="math display">\[\hat{y} = \theta_{1}\phi_{1} + \theta_{2}\phi_{2} + \theta_{3}\phi_{3} + \theta_{4}\phi_{4} + \theta_{5}\phi_{5} + \theta_{6}\phi_{6}\]</span></p>
 <p>Now, the <code>day</code> feature (or rather, the four new boolean features that represent day) can be used to fit a model.</p>
 <p>Using <code>sklearn</code> to fit the new model, we can determine the model coefficients, allowing us to understand how each feature impacts the predicted tip.</p>
-<div id="cbf25ded" class="cell" data-execution_count="6">
+<div id="48281d75" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> sklearn.linear_model <span class="im">import</span> LinearRegression</span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>data_w_ohe <span class="op">=</span> tips[[<span class="st">"total_bill"</span>, <span class="st">"size"</span>, <span class="st">"day"</span>]].join(encoded_day_df).drop(columns <span class="op">=</span> <span class="st">"day"</span>)</span>
 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>ohe_model <span class="op">=</span> lm.LinearRegression(fit_intercept<span class="op">=</span><span class="va">False</span>) <span class="co">#Tell sklearn to not add an additional bias column. Why?</span></span>
@@ -960,7 +960,7 @@ <h2 data-number="14.5" class="anchored" data-anchor-id="polynomial-features"><sp
 <p>We have encountered a few cases now where models with linear features have performed poorly on datasets that show clear non-linear curvature.</p>
 <p>As an example, consider the <code>vehicles</code> dataset, which contains information about cars. Suppose we want to use the <code>hp</code> (horsepower) of a car to predict its <code>"mpg"</code> (gas mileage in miles per gallon). If we visualize the relationship between these two variables, we see a non-linear curvature. Fitting a linear model to these variables results in a high (poor) value of RMSE.</p>
 <p><span class="math display">\[\hat{y} = \theta_0 + \theta_1 (\text{hp})\]</span></p>
-<div id="9d0ee83f" class="cell" data-execution_count="7">
+<div id="4749e44f" class="cell" data-execution_count="7">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>pd.options.mode.chained_assignment <span class="op">=</span> <span class="va">None</span> </span>
@@ -994,7 +994,7 @@ <h2 data-number="14.5" class="anchored" data-anchor-id="polynomial-features"><sp
 <p>As we can see from the plot, the data follows a curved line rather than a straight one. To capture this non-linearity, we can incorporate <strong>non-linear</strong> features. Let’s introduce a <strong>polynomial</strong> term, <span class="math inline">\(\text{hp}^2\)</span>, into our regression model. The model now takes the form:</p>
 <p><span class="math display">\[\hat{y} = \theta_0 + \theta_1 (\text{hp}) + \theta_2 (\text{hp}^2)\]</span> <span class="math display">\[\hat{y} = \theta_0 + \theta_1 \phi_1 + \theta_2 \phi_2\]</span></p>
 <p>How can we fit a model with non-linear features? We can use the exact same techniques as before: ordinary least squares, gradient descent, or <code>sklearn</code>. This is because our new model is still a <strong>linear model</strong>. Although it contains non-linear <em>features</em>, it is linear with respect to the model <em>parameters</em>. All of our previous work on fitting models was done under the assumption that we were working with linear models. Because our new model is still linear, we can apply our existing methods to determine the optimal parameters.</p>
-<div id="9d6673ca" class="cell" data-execution_count="8">
+<div id="bde1ace2" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Add a hp^2 feature to the design matrix</span></span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> vehicles[[<span class="st">"hp"</span>]]</span>
 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>X[<span class="st">"hp^2"</span>] <span class="op">=</span> vehicles[<span class="st">"hp"</span>]<span class="op">**</span><span class="dv">2</span></span>
diff --git a/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-8-output-2.pdf b/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-8-output-2.pdf
index d5228371..ad986f21 100644
Binary files a/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-8-output-2.pdf and b/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-8-output-2.pdf differ
diff --git a/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-9-output-2.pdf b/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-9-output-2.pdf
index fe0e87bc..49b96389 100644
Binary files a/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-9-output-2.pdf and b/docs/feature_engineering/feature_engineering_files/figure-pdf/cell-9-output-2.pdf differ
diff --git a/docs/gradient_descent/gradient_descent.html b/docs/gradient_descent/gradient_descent.html
index 1933466f..63d132e3 100644
--- a/docs/gradient_descent/gradient_descent.html
+++ b/docs/gradient_descent/gradient_descent.html
@@ -367,7 +367,7 @@ <h2 id="toc-title">sklearn and Gradient Descent</h2>
 </div>
 </div>
 </div>
-<div id="20775dab" class="cell" data-execution_count="1">
+<div id="658ba891" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -384,7 +384,7 @@ <h2 data-number="13.1" class="anchored" data-anchor-id="sklearn"><span class="he
 <section id="implementing-derived-formulas-in-code" class="level3" data-number="13.1.1">
 <h3 data-number="13.1.1" class="anchored" data-anchor-id="implementing-derived-formulas-in-code"><span class="header-section-number">13.1.1</span> Implementing Derived Formulas in Code</h3>
 <p>Throughout this lecture, we’ll refer to the <code>penguins</code> dataset.</p>
-<div id="a46a370c" class="cell" data-execution_count="2">
+<div id="a9661fbc" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -470,7 +470,7 @@ <h3 data-number="13.1.1" class="anchored" data-anchor-id="implementing-derived-f
 </div>
 </div>
 <p>Our goal will be to predict the value of the <code>"bill_depth_mm"</code> for a particular penguin given its <code>"flipper_length_mm"</code> and <code>"body_mass_g"</code>. We’ll also add a bias column of all ones to represent the intercept term of our models.</p>
-<div id="6f3acb27" class="cell" data-execution_count="3">
+<div id="c7a99d36" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Add a bias column of all ones to `penguins`</span></span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>penguins[<span class="st">"bias"</span>] <span class="op">=</span> np.ones(<span class="bu">len</span>(penguins), dtype<span class="op">=</span><span class="bu">int</span>) </span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -494,7 +494,7 @@ <h3 data-number="13.1.1" class="anchored" data-anchor-id="implementing-derived-f
 <li>To compute an inverse, use <code>NumPy</code>’s in-built method <code>np.linalg.inv</code></li>
 </ul>
 <p>Putting this all together, we can compute the OLS estimate for the optimal model parameters, stored in the array <code>theta_hat</code>.</p>
-<div id="635855c6" class="cell" data-execution_count="4">
+<div id="ee656930" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>theta_hat <span class="op">=</span> np.linalg.inv(X.T <span class="op">@</span> X) <span class="op">@</span> X.T <span class="op">@</span> Y</span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>theta_hat</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="4">
@@ -505,7 +505,7 @@ <h3 data-number="13.1.1" class="anchored" data-anchor-id="implementing-derived-f
 </div>
 <p>To make predictions using our optimized parameter values, we matrix-multiply the design matrix with the parameter vector:</p>
 <p><span class="math display">\[\hat{\mathbb{Y}} = \mathbb{X}\theta\]</span></p>
-<div id="68ec8103" class="cell" data-execution_count="5">
+<div id="4f33f127" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>Y_hat <span class="op">=</span> X <span class="op">@</span> theta_hat</span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>pd.DataFrame(Y_hat).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
@@ -571,14 +571,14 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 <p><code>sklearn</code> stores “templates” of useful models for machine learning. We begin the modeling process by making a “copy” of one of these templates for our own use. Model initialization looks like <code>ModelClass()</code>, where <code>ModelClass</code> is the type of model we wish to create.</p>
 <p>For now, let’s create a linear regression model using <code>LinearRegression</code>.</p>
 <p><code>my_model</code> is now an instance of the <code>LinearRegression</code> class. You can think of it as the “idea” of a linear regression model. We haven’t trained it yet, so it doesn’t know any model parameters and cannot be used to make predictions. In fact, we haven’t even told it what data to use for modeling! It simply waits for further instructions.</p>
-<div id="7b59b0f5" class="cell" data-execution_count="6">
+<div id="56ad56b2" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>my_model <span class="op">=</span> LinearRegression()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p><strong>2. Train the model using <code>.fit</code></strong></p>
 <p>Before the model can make predictions, we will need to fit it to our training data. When we fit the model, <code>sklearn</code> will run gradient descent behind the scenes to determine the optimal model parameters. It will then save these model parameters to our model instance for future use.</p>
 <p>All <code>sklearn</code> model classes include a <code>.fit</code> method, which is used to fit the model. It takes in two inputs: the design matrix, <code>X</code>, and the target variable, <code>Y</code>.</p>
 <p>Let’s start by fitting a model with just one feature: the flipper length. We create a design matrix <code>X</code> by pulling out the <code>"flipper_length_mm"</code> column from the <code>DataFrame</code>.</p>
-<div id="cfdcaa7f" class="cell" data-execution_count="7">
+<div id="d0c50d88" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="co"># .fit expects a 2D data design matrix, so we use double brackets to extract a DataFrame</span></span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>X <span class="op">=</span> penguins[[<span class="st">"flipper_length_mm"</span>]]</span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>Y <span class="op">=</span> penguins[<span class="st">"bill_depth_mm"</span>]</span>
@@ -998,14 +998,14 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 <p>Note that <code>LinearRegression</code> will automatically include an intercept term.</p>
 <p>The fitted model parameters are stored as attributes of the model instance. <code>my_model.intercept_</code> will return the value of <span class="math inline">\(\hat{\theta}_0\)</span> as a scalar. <code>my_model.coef_</code> will return all values <span class="math inline">\(\hat{\theta}_1,
 \hat{\theta}_1, ...\)</span> in an array. Because our model only contains one feature, we see just the value of <span class="math inline">\(\hat{\theta}_1\)</span> in the cell below.</p>
-<div id="b9173774" class="cell" data-execution_count="8">
+<div id="981da37b" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The intercept term, theta_0</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>my_model.intercept_</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
 <pre><code>np.float64(7.297305899612313)</code></pre>
 </div>
 </div>
-<div id="db47e8ae" class="cell" data-execution_count="9">
+<div id="679d9303" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># All parameters theta_1, ..., theta_p</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>my_model.coef_</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
@@ -1015,7 +1015,7 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 <p><strong>3. Use the fitted model to make predictions</strong></p>
 <p>Now that the model has been trained, we can use it to make predictions! To do so, we use the <code>.predict</code> method. <code>.predict</code> takes in one argument: the design matrix that should be used to generate predictions. To understand how the model performs on the training set, we would pass in the training data. Alternatively, to make predictions on unseen data, we would pass in a new dataset that wasn’t used to train the model.</p>
 <p>Below, we call <code>.predict</code> to generate model predictions on the original training data. As before, we use double brackets to ensure that we extract 2-dimensional data.</p>
-<div id="66f92f84" class="cell" data-execution_count="10">
+<div id="5d342ce6" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>Y_hat_one_feature <span class="op">=</span> my_model.predict(penguins[[<span class="st">"flipper_length_mm"</span>]])</span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="ss">f"The RMSE of the model is </span><span class="sc">{</span>np<span class="sc">.</span>sqrt(np.mean((Y<span class="op">-</span>Y_hat_one_feature)<span class="op">**</span><span class="dv">2</span>))<span class="sc">}</span><span class="ss">"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1026,7 +1026,7 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 <p>What if we wanted a model with two features?</p>
 <p><span class="math display">\[\text{bill depth} = \theta_0 + \theta_1 \text{flipper length} + \theta_2 \text{body mass}\]</span></p>
 <p>We repeat this three-step process by intializing a new model object, then calling <code>.fit</code> and <code>.predict</code> as before.</p>
-<div id="748000d6" class="cell" data-execution_count="11">
+<div id="b42c837d" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Step 1: initialize LinearRegression model</span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>two_feature_model <span class="op">=</span> LinearRegression()</span>
 <span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1045,7 +1045,7 @@ <h3 data-number="13.1.2" class="anchored" data-anchor-id="the-sklearn-workflow">
 </div>
 </div>
 <p>We can also see that we obtain the same predictions using <code>sklearn</code> as we did when applying the ordinary least squares formula before!</p>
-<div id="83e03f0c" class="cell" data-execution_count="12">
+<div id="a1e02c93" class="cell" data-execution_count="12">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({<span class="st">"Y_hat from OLS"</span>:np.squeeze(Y_hat), <span class="st">"Y_hat from sklearn"</span>:Y_hat_two_features}).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1110,7 +1110,7 @@ <h2 data-number="13.2" class="anchored" data-anchor-id="gradient-descent"><span
 <section id="minimizing-an-arbitrary-1d-function" class="level3" data-number="13.2.1">
 <h3 data-number="13.2.1" class="anchored" data-anchor-id="minimizing-an-arbitrary-1d-function"><span class="header-section-number">13.2.1</span> Minimizing an Arbitrary 1D Function</h3>
 <p>Let’s consider an arbitrary function. Our goal is to find the value of <span class="math inline">\(x\)</span> that minimizes this function.</p>
-<div id="a789e4c7" class="cell" data-execution_count="13">
+<div id="b10011c1" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> arbitrary(x):</span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (x<span class="op">**</span><span class="dv">4</span> <span class="op">-</span> <span class="dv">15</span><span class="op">*</span>x<span class="op">**</span><span class="dv">3</span> <span class="op">+</span> <span class="dv">80</span><span class="op">*</span>x<span class="op">**</span><span class="dv">2</span> <span class="op">-</span> <span class="dv">180</span><span class="op">*</span>x <span class="op">+</span> <span class="dv">144</span>)<span class="op">/</span><span class="dv">10</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
@@ -1118,14 +1118,14 @@ <h3 data-number="13.2.1" class="anchored" data-anchor-id="minimizing-an-arbitrar
 <section id="the-naive-approach-guess-and-check" class="level4" data-number="13.2.1.1">
 <h4 data-number="13.2.1.1" class="anchored" data-anchor-id="the-naive-approach-guess-and-check"><span class="header-section-number">13.2.1.1</span> The Naive Approach: Guess and Check</h4>
 <p>Above, we saw that the minimum is somewhere around 5.3. Let’s see if we can figure out how to find the exact minimum algorithmically from scratch. One very slow (and terrible) way would be manual guess-and-check.</p>
-<div id="5b4956fb" class="cell" data-execution_count="14">
+<div id="f36145cd" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>arbitrary(<span class="dv">6</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
 <pre><code>0.0</code></pre>
 </div>
 </div>
 <p>A somewhat better (but still slow) approach is to use brute force to try out a bunch of x values and return the one that yields the lowest loss.</p>
-<div id="ee2ce13f" class="cell" data-execution_count="15">
+<div id="0bb93206" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> simple_minimize(f, xs):</span>
 <span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Takes in a function f and a set of values xs. </span></span>
 <span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>    <span class="co"># Calculates the value of the function f at all values x in xs</span></span>
@@ -1140,7 +1140,7 @@ <h4 data-number="13.2.1.1" class="anchored" data-anchor-id="the-naive-approach-g
 </div>
 </div>
 <p>This process is essentially the same as before where we made a graphical plot, it’s just that we’re only looking at 20 selected points.</p>
-<div id="7c2db483" class="cell" data-execution_count="16">
+<div id="89f6b371" class="cell" data-execution_count="16">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>xs <span class="op">=</span> np.linspace(<span class="dv">1</span>, <span class="dv">7</span>, <span class="dv">200</span>)</span>
@@ -1156,9 +1156,9 @@ <h4 data-number="13.2.1.1" class="anchored" data-anchor-id="the-naive-approach-g
 <span id="cb28-11"><a href="#cb28-11" aria-hidden="true" tabindex="-1"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="fad4015b-eded-428b-b456-7ef7f9d995e0" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("fad4015b-eded-428b-b456-7ef7f9d995e0")) {                    Plotly.newPlot(                        "fad4015b-eded-428b-b456-7ef7f9d995e0",                        [{"hovertemplate":"x=%{x}<br>y=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"xaxis":"x","y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"yaxis":"y","type":"scatter"},{"mode":"markers","x":[1.0,2.5,4.0,5.5,7.0],"y":[3.0,-0.13125,0.0,-0.65625,6.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"x"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"y"}},"legend":{"tracegroupgap":0},"showlegend":false,"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="3ccb4355-2e79-4059-a645-b7b8458875a8" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("3ccb4355-2e79-4059-a645-b7b8458875a8")) {                    Plotly.newPlot(                        "3ccb4355-2e79-4059-a645-b7b8458875a8",                        [{"hovertemplate":"x=%{x}<br>y=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"xaxis":"x","y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"yaxis":"y","type":"scatter"},{"mode":"markers","x":[1.0,2.5,4.0,5.5,7.0],"y":[3.0,-0.13125,0.0,-0.65625,6.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"x"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"y"}},"legend":{"tracegroupgap":0},"showlegend":false,"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('fad4015b-eded-428b-b456-7ef7f9d995e0');
+var gd = document.getElementById('3ccb4355-2e79-4059-a645-b7b8458875a8');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1193,7 +1193,7 @@ <h4 data-number="13.2.1.1" class="anchored" data-anchor-id="the-naive-approach-g
 <section id="scipy.optimize.minimize" class="level4" data-number="13.2.1.2">
 <h4 data-number="13.2.1.2" class="anchored" data-anchor-id="scipy.optimize.minimize"><span class="header-section-number">13.2.1.2</span> <code>Scipy.optimize.minimize</code></h4>
 <p>One way to minimize this mathematical function is to use the <code>scipy.optimize.minimize</code> function. It takes a function and a starting guess and tries to find the minimum.</p>
-<div id="53879901" class="cell" data-execution_count="17">
+<div id="12eb9237" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> scipy.optimize <span class="im">import</span> minimize</span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a><span class="co"># takes a function f and a starting point x0 and returns a readout </span></span>
@@ -1220,7 +1220,7 @@ <h4 data-number="13.2.1.2" class="anchored" data-anchor-id="scipy.optimize.minim
 <h4 data-number="13.2.1.3" class="anchored" data-anchor-id="digging-into-gradient-descent"><span class="header-section-number">13.2.1.3</span> Digging into Gradient Descent</h4>
 <p>Looking at the function across this domain, it is clear that the function’s minimum value occurs around <span class="math inline">\(\theta = 5.3\)</span>. Let’s pretend for a moment that we <em>couldn’t</em> see the full view of the cost function. How would we guess the value of <span class="math inline">\(\theta\)</span> that minimizes the function?</p>
 <p>It turns out that the first derivative of the function can give us a clue. In the graph below, the function and its derivative are plotted, with points where the derivative is equal to 0 plotted in light green.</p>
-<div id="b8813e8a" class="cell" data-execution_count="18">
+<div id="66b7cde1" class="cell" data-execution_count="18">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.graph_objects <span class="im">as</span> go</span>
@@ -1242,9 +1242,9 @@ <h4 data-number="13.2.1.3" class="anchored" data-anchor-id="digging-into-gradien
 <span id="cb31-17"><a href="#cb31-17" aria-hidden="true" tabindex="-1"></a>fig.show()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="21361448-c8c3-4a79-a7c7-3a8cd3c2e37e" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("21361448-c8c3-4a79-a7c7-3a8cd3c2e37e")) {                    Plotly.newPlot(                        "21361448-c8c3-4a79-a7c7-3a8cd3c2e37e",                        [{"mode":"lines","name":"f","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"type":"scatter"},{"line":{"dash":"dash"},"mode":"lines","name":"df","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[-6.1,-5.855752779706215,-5.617439626099488,-5.384994757378214,-5.158352391740783,-4.937446747385573,-4.72221204251098,-4.512582495315394,-4.3084923239972,-4.109875746754784,-3.9166669817865367,-3.728800247290849,-3.546209761466102,-3.3688297425106897,-3.1965944086229996,-3.0294379780014196,-2.867294668844335,-2.710098699350141,-2.5577842877172143,-2.410285652143955,-2.2675370108287467,-2.129472581969978,-1.9960265837660303,-1.8671332344153029,-1.7427267521161809,-1.6227413550670406,-1.5071112614662923,-1.3957706895123068,-1.288653857403483,-1.1856949833381947,-1.0868282855148437,-0.9919879821318176,-0.9011082913874986,-0.8141234314802744,-0.7309676206085385,-0.6515750769706727,-0.5758800187650707,-0.5038166641901227,-0.4353192314442083,-0.37032193872572067,-0.30875900423305325,-0.2505646461645881,-0.1956730827187158,-0.14401853209381557,-0.09553521248829214,-0.050157342100516186,-0.007819139128892516,0.03154517822820253,0.0680013917723727,0.10161528330524447,0.1324526346284074,0.1605792275434908,0.18606084385210409,0.20896326535584536,0.22935227385634108,0.247293651155195,0.26285317905403077,0.2760966393544493,0.2870898138580628,0.295898484366478,0.30258843268132407,0.30722544060420204,0.30987528993671276,0.3106037624804969,0.3094766400371327,0.3065597044082608,0.3019187373954651,0.2956195208003862,0.28772783642461375,0.278309466069777,0.2674301915374713,0.2551557946293144,0.24155205714693012,0.2266847608919079,0.2106196876658714,0.19342261927042728,0.17515933750721616,0.15589562417781053,0.13569726108383975,0.11463003002689902,0.09275971280864041,0.07015209123063641,0.04687294709450498,0.022988062201864067,-0.0014367816456569925,-0.026335802646468665,-0.05164321899897004,-0.07729324890150338,-0.10322011055251892,-0.12935802215034756,-0.15564120189339975,-0.18200386798008594,-0.20838023860876548,-0.23470453197783173,-0.26091096628567245,-0.28693375973069807,-0.3127071305112679,-0.33816529682578106,-0.36324247687263095,-0.387872888850211,-0.4119907509568804,-0.4355302813910555,-0.4584256983510954,-0.48061122003543344,-0.5020210646424061,-0.5225894503704523,-0.5422505954179314,-0.560938717983231,-0.5785880362647674,-0.5951327684608827,-0.6105071327699989,-0.6246453473904694,-0.6374816305207218,-0.648950200359127,-0.6589852751040894,-0.6675210729539799,-0.6744918121071806,-0.6798317107620733,-0.6834749871170857,-0.6853558593705884,-0.6854085457209521,-0.6835672643665817,-0.6797662335058476,-0.6739396713371661,-0.666021796058908,-0.6559468258694551,-0.643648978967201,-0.6290624735505503,-0.6121215278178852,-0.5927603599675649,-0.5709131881979829,-0.546514230707578,-0.5194977056946982,-0.48979783135773686,-0.4573488258950988,-0.42208490750512057,-0.38394029438626376,-0.3428492047368536,-0.29874585675530624,-0.2515644686400151,-0.20123925858937355,-0.1477044448017068,-0.09089424547549925,-0.030742878809076047,0.032815436999146644,0.09984648375078678,0.17041604324746232,0.24458989729080258,0.3224338276823687,0.4040136162238241,0.48939504471677536,0.5786438949628178,0.6718259487635351,0.7690069879205907,0.8702527942355687,0.9756291495100868,1.0852018355457402,1.1990366341441927,1.3171993271069824,1.439755696235784,1.5667715233321815,1.6983125901977814,1.834444678634202,1.9752335704430606,2.1207450474259644,2.2710448913845083,2.4261988841203674,2.58627280743508,2.7513324431302637,2.9214435730075934,3.0966719788686077,3.2770834425149475,3.462743745748264,3.6537186703701194,3.8500739981821313,4.051875510985894,4.259188990583084,4.472080218775238,4.690614977364044,4.914859048151049,5.144878212937897,5.380738253526147,5.622504951717474,5.870244089313519,6.124021448115786,6.383902809925985,6.649953956545687,6.922240669776488,7.200828731420051,7.485783923277927,7.777172027151755,8.075058824843131,8.37951009815372,8.690591628885068,9.00836919883884,9.332908589816588,9.664275583619997,10.002535962050592,10.347755506910039,10.7],"type":"scatter"},{"marker":{"size":12},"mode":"markers","name":"df = zero","x":[2.3927,3.5309,5.3263],"y":[0.0,0.0,0.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"font":{"size":20},"yaxis":{"range":[-1,3]},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="a8dfc445-0cbb-4608-bd77-3e7ec9891334" class="plotly-graph-div" style="height:600px; width:800px;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("a8dfc445-0cbb-4608-bd77-3e7ec9891334")) {                    Plotly.newPlot(                        "a8dfc445-0cbb-4608-bd77-3e7ec9891334",                        [{"mode":"lines","name":"f","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[3.0,2.8197775132646994,2.6468296407545298,2.480978457571409,2.3220480221881674,2.169864376448527,2.0242555455671196,1.8850515381294826,1.7520843460920474,1.6251879447821538,1.5041982928980473,1.3889533325088705,1.2792929890546703,1.175059171346399,1.076095771565909,0.9822486652659563,0.8933657113701969,0.809296752173205,0.7298936133404282,0.6550101039082478,0.5845020162839318,0.5182271262456482,0.45604519294247436,0.39781795889439875,0.34340914999228855,0.2926844754979413,0.24551162804404497,0.20176028363418083,0.1613021016428462,0.1240107248154402,0.08976177926825812,0.05843287448851129,0.029903603334304307,0.0040555420346265695,-0.019227749810596606,-0.04006072923056081,-0.0585558698835257,-0.07482366205689459,-0.08897261266714337,-0.10110924525984047,-0.11133810000965809,-0.119761733720361,-0.12648071982483203,-0.1315936483850237,-0.13519712609199247,-0.13738577626590426,-0.1382522388560119,-0.13788717044065493,-0.13637924422729383,-0.13381515005247593,-0.13027959438184666,-0.1258553003101497,-0.12062300756120407,-0.11466147248795551,-0.10804746807244214,-0.10085578392577758,-0.09315922628821909,-0.08502861802905386,-0.0765327986467014,-0.06773862426869641,-0.05871096765167181,-0.04951271818131318,-0.04020478187242702,-0.030846081368940757,-0.021493555943828825,-0.012202161499220664,-0.0030248705662870635,0.005987327694657552,0.014785427494228998,0.023322406413939234,0.03155322540619636,0.039434828794344415,0.046926144272549666,0.053988082905993905,0.06058353913065275,0.06667739075349459,0.0722364989522987,0.0772297082758655,0.08162784664377228,0.0854037253466231,0.0885321390457932,0.09098986577366759,0.09275566693348196,0.0938102872993909,0.09413645501645647,0.09371888160064827,0.09254426193878089,0.09060127428864462,0.08788058027890884,0.08437482490911634,0.08007863654980837,0.07498862694224044,0.06910339119879154,0.062423507802634504,0.05495153860777009,0.04669202883931121,0.037651507093005424,0.027838485335746555,0.01726345890522225,0.005938906509970821,-0.006120709770470967,-0.018898944485715673,-0.03237736881442288,-0.04653557056435602,-0.061351154172564294,-0.07679974070485969,-0.0928549678563968,-0.10948848995135449,-0.12666997794297002,-0.14436711941368685,-0.16254561857489308,-0.1811691962672853,-0.2001995899604026,-0.21959655375303555,-0.23931785837306735,-0.25931929117742814,-0.2795546561521974,-0.2999757739125698,-0.32053248170270765,-0.34117263339597914,-0.3618420994948906,-0.38248476713084756,-0.40304254006464363,-0.4234553386859261,-0.4436611000135485,-0.46359577769538873,-0.483193342008542,-0.5023857798591053,-0.5211030947822792,-0.5392733069424139,-0.5568224531329065,-0.5736745867762579,-0.589751777924107,-0.6049741132570944,-0.6192596960850778,-0.632524646347008,-0.6446831006107573,-0.6556472120734724,-0.6653271505614157,-0.6736311025297482,-0.6804652710629284,-0.6857338758744731,-0.689339153306878,-0.6911813563318902,-0.6911587545502471,-0.6891676341918014,-0.6851022981155552,-0.6788550658095346,-0.6703162733909721,-0.6593742736060904,-0.6459154358302271,-0.6298241460678924,-0.6109828069525293,-0.5892718377469237,-0.5645696743426925,-0.5367527692607041,-0.5056955916510105,-0.47127062729252883,-0.4333483785933822,-0.39179736459091147,-0.3464841209514134,-0.2972731999702091,-0.24402717057191695,-0.18660661831017933,-0.12487014536754941,-0.05867437055599112,0.01212607068350735,0.08767852628116088,0.1681323275376599,0.2536387891250115,0.34435120908576666,0.4404248688334974,0.5420170331528652,0.6492869501990981,0.7623958514986725,0.8815069519487224,1.0067854498173574,1.13839852674364,1.2765153477374043,1.421307061179641,1.572946798821863,1.7316096757870127,1.897472790568304,2.070715225030267,2.251518044408317,2.4400642973086635,2.636539015708513,2.841129214955754,3.0540238937694992,3.27541403423952,3.5054926018266315,3.7444545453624185,3.9924967970495344,4.249818272461312,4.516619870542331,4.793104473607627,5.079476947343528,5.37594414080711,5.682714886426311,6.0],"type":"scatter"},{"line":{"dash":"dash"},"mode":"lines","name":"df","x":[1.0,1.0301507537688441,1.0603015075376885,1.0904522613065326,1.120603015075377,1.150753768844221,1.1809045226130652,1.2110552763819096,1.2412060301507537,1.271356783919598,1.3015075376884422,1.3316582914572863,1.3618090452261307,1.3919597989949748,1.4221105527638191,1.4522613065326633,1.4824120603015074,1.5125628140703518,1.542713567839196,1.5728643216080402,1.6030150753768844,1.6331658291457285,1.6633165829145728,1.6934673366834172,1.7236180904522613,1.7537688442211055,1.7839195979899496,1.814070351758794,1.8442211055276383,1.8743718592964824,1.9045226130653266,1.9346733668341707,1.964824120603015,1.9949748743718594,2.0251256281407035,2.0552763819095476,2.085427135678392,2.115577889447236,2.1457286432160805,2.1758793969849246,2.2060301507537687,2.2361809045226133,2.266331658291457,2.2964824120603016,2.3266331658291457,2.35678391959799,2.3869346733668344,2.417085427135678,2.4472361809045227,2.477386934673367,2.507537688442211,2.5376884422110555,2.567839195979899,2.5979899497487438,2.628140703517588,2.658291457286432,2.6884422110552766,2.7185929648241203,2.748743718592965,2.778894472361809,2.809045226130653,2.8391959798994977,2.8693467336683414,2.899497487437186,2.92964824120603,2.959798994974874,2.9899497487437188,3.020100502512563,3.050251256281407,3.080402010050251,3.1105527638190953,3.1407035175879394,3.170854271356784,3.201005025125628,3.2311557788944723,3.2613065326633164,3.2914572864321605,3.321608040201005,3.351758793969849,3.3819095477386933,3.4120603015075375,3.4422110552763816,3.472361809045226,3.5025125628140703,3.5326633165829144,3.5628140703517586,3.5929648241206027,3.6231155778894473,3.6532663316582914,3.6834170854271355,3.7135678391959797,3.743718592964824,3.7738693467336684,3.8040201005025125,3.8341708542713566,3.8643216080402008,3.8944723618090453,3.9246231155778895,3.9547738693467336,3.9849246231155777,4.015075376884422,4.045226130653266,4.075376884422111,4.105527638190955,4.135678391959798,4.165829145728643,4.1959798994974875,4.226130653266331,4.256281407035176,4.28643216080402,4.316582914572864,4.346733668341709,4.376884422110553,4.407035175879397,4.4371859296482405,4.467336683417085,4.49748743718593,4.527638190954773,4.557788944723618,4.5879396984924625,4.618090452261306,4.648241206030151,4.678391959798995,4.708542713567839,4.738693467336683,4.768844221105527,4.798994974874372,4.829145728643216,4.85929648241206,4.889447236180905,4.919597989949748,4.949748743718593,4.9798994974874375,5.010050251256281,5.040201005025126,5.0703517587939695,5.100502512562814,5.130653266331658,5.160804020100502,5.190954773869347,5.221105527638191,5.251256281407035,5.281407035175879,5.311557788944723,5.341708542713568,5.371859296482412,5.402010050251256,5.4321608040201,5.4623115577889445,5.492462311557789,5.522613065326633,5.552763819095477,5.582914572864321,5.613065326633166,5.64321608040201,5.673366834170854,5.703517587939698,5.733668341708542,5.763819095477387,5.793969849246231,5.824120603015075,5.8542713567839195,5.884422110552763,5.914572864321608,5.944723618090452,5.974874371859296,6.005025125628141,6.035175879396984,6.065326633165829,6.0954773869346734,6.125628140703517,6.155778894472362,6.185929648241205,6.21608040201005,6.2462311557788945,6.276381909547738,6.306532663316583,6.3366834170854265,6.366834170854271,6.396984924623116,6.427135678391959,6.457286432160804,6.487437185929648,6.517587939698492,6.547738693467337,6.57788944723618,6.608040201005025,6.638190954773869,6.668341708542713,6.698492462311558,6.7286432160804015,6.758793969849246,6.788944723618091,6.819095477386934,6.849246231155779,6.879396984924623,6.909547738693467,6.939698492462312,6.969849246231155,7.0],"y":[-6.1,-5.855752779706215,-5.617439626099488,-5.384994757378214,-5.158352391740783,-4.937446747385573,-4.72221204251098,-4.512582495315394,-4.3084923239972,-4.109875746754784,-3.9166669817865367,-3.728800247290849,-3.546209761466102,-3.3688297425106897,-3.1965944086229996,-3.0294379780014196,-2.867294668844335,-2.710098699350141,-2.5577842877172143,-2.410285652143955,-2.2675370108287467,-2.129472581969978,-1.9960265837660303,-1.8671332344153029,-1.7427267521161809,-1.6227413550670406,-1.5071112614662923,-1.3957706895123068,-1.288653857403483,-1.1856949833381947,-1.0868282855148437,-0.9919879821318176,-0.9011082913874986,-0.8141234314802744,-0.7309676206085385,-0.6515750769706727,-0.5758800187650707,-0.5038166641901227,-0.4353192314442083,-0.37032193872572067,-0.30875900423305325,-0.2505646461645881,-0.1956730827187158,-0.14401853209381557,-0.09553521248829214,-0.050157342100516186,-0.007819139128892516,0.03154517822820253,0.0680013917723727,0.10161528330524447,0.1324526346284074,0.1605792275434908,0.18606084385210409,0.20896326535584536,0.22935227385634108,0.247293651155195,0.26285317905403077,0.2760966393544493,0.2870898138580628,0.295898484366478,0.30258843268132407,0.30722544060420204,0.30987528993671276,0.3106037624804969,0.3094766400371327,0.3065597044082608,0.3019187373954651,0.2956195208003862,0.28772783642461375,0.278309466069777,0.2674301915374713,0.2551557946293144,0.24155205714693012,0.2266847608919079,0.2106196876658714,0.19342261927042728,0.17515933750721616,0.15589562417781053,0.13569726108383975,0.11463003002689902,0.09275971280864041,0.07015209123063641,0.04687294709450498,0.022988062201864067,-0.0014367816456569925,-0.026335802646468665,-0.05164321899897004,-0.07729324890150338,-0.10322011055251892,-0.12935802215034756,-0.15564120189339975,-0.18200386798008594,-0.20838023860876548,-0.23470453197783173,-0.26091096628567245,-0.28693375973069807,-0.3127071305112679,-0.33816529682578106,-0.36324247687263095,-0.387872888850211,-0.4119907509568804,-0.4355302813910555,-0.4584256983510954,-0.48061122003543344,-0.5020210646424061,-0.5225894503704523,-0.5422505954179314,-0.560938717983231,-0.5785880362647674,-0.5951327684608827,-0.6105071327699989,-0.6246453473904694,-0.6374816305207218,-0.648950200359127,-0.6589852751040894,-0.6675210729539799,-0.6744918121071806,-0.6798317107620733,-0.6834749871170857,-0.6853558593705884,-0.6854085457209521,-0.6835672643665817,-0.6797662335058476,-0.6739396713371661,-0.666021796058908,-0.6559468258694551,-0.643648978967201,-0.6290624735505503,-0.6121215278178852,-0.5927603599675649,-0.5709131881979829,-0.546514230707578,-0.5194977056946982,-0.48979783135773686,-0.4573488258950988,-0.42208490750512057,-0.38394029438626376,-0.3428492047368536,-0.29874585675530624,-0.2515644686400151,-0.20123925858937355,-0.1477044448017068,-0.09089424547549925,-0.030742878809076047,0.032815436999146644,0.09984648375078678,0.17041604324746232,0.24458989729080258,0.3224338276823687,0.4040136162238241,0.48939504471677536,0.5786438949628178,0.6718259487635351,0.7690069879205907,0.8702527942355687,0.9756291495100868,1.0852018355457402,1.1990366341441927,1.3171993271069824,1.439755696235784,1.5667715233321815,1.6983125901977814,1.834444678634202,1.9752335704430606,2.1207450474259644,2.2710448913845083,2.4261988841203674,2.58627280743508,2.7513324431302637,2.9214435730075934,3.0966719788686077,3.2770834425149475,3.462743745748264,3.6537186703701194,3.8500739981821313,4.051875510985894,4.259188990583084,4.472080218775238,4.690614977364044,4.914859048151049,5.144878212937897,5.380738253526147,5.622504951717474,5.870244089313519,6.124021448115786,6.383902809925985,6.649953956545687,6.922240669776488,7.200828731420051,7.485783923277927,7.777172027151755,8.075058824843131,8.37951009815372,8.690591628885068,9.00836919883884,9.332908589816588,9.664275583619997,10.002535962050592,10.347755506910039,10.7],"type":"scatter"},{"marker":{"size":12},"mode":"markers","name":"df = zero","x":[2.3927,3.5309,5.3263],"y":[0.0,0.0,0.0],"type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"font":{"size":20},"yaxis":{"range":[-1,3]},"autosize":false,"width":800,"height":600},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('21361448-c8c3-4a79-a7c7-3a8cd3c2e37e');
+var gd = document.getElementById('a8dfc445-0cbb-4608-bd77-3e7ec9891334');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1405,7 +1405,7 @@ <h4 data-number="13.2.3.1" class="anchored" data-anchor-id="gradient-descent-on-
 <li>Choose a loss function: <span class="math inline">\(L(\theta) = MSE(\theta) = \frac{1}{n} \sum_{i=1}^n (y_i - \theta_1x_i)^2\)</span>.</li>
 </ul>
 <p>Let’s apply our <code>gradient_descent</code> function from before to optimize our model on the <code>tips</code> dataset. We will try to select the best parameter <span class="math inline">\(\theta_i\)</span> to predict the <code>tip</code> <span class="math inline">\(y\)</span> from the <code>total_bill</code> <span class="math inline">\(x\)</span>.</p>
-<div id="9bce9011" class="cell" data-execution_count="19">
+<div id="9ab52fef" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> sns.load_dataset(<span class="st">"tips"</span>)</span>
 <span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>df.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="19">
@@ -1491,7 +1491,7 @@ <h4 data-number="13.2.3.1" class="anchored" data-anchor-id="gradient-descent-on-
 </ul>
 <p>for some learning rate <span class="math inline">\(\alpha\)</span>.</p>
 <p>Implementing this in code, we can visualize the MSE loss on the <code>tips</code> data. <strong>MSE is convex</strong>, so there is one global minimum.</p>
-<div id="6b2301d7" class="cell" data-execution_count="20">
+<div id="3d6cda04" class="cell" data-execution_count="20">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gradient_descent(df, initial_guess, alpha, n):</span>
diff --git a/docs/gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf b/docs/gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf
index 18ec7649..68332dd6 100644
Binary files a/docs/gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf and b/docs/gradient_descent/gradient_descent_files/figure-pdf/cell-21-output-2.pdf differ
diff --git a/docs/intro_to_modeling/intro_to_modeling.html b/docs/intro_to_modeling/intro_to_modeling.html
index e4438dd1..ae4792ac 100644
--- a/docs/intro_to_modeling/intro_to_modeling.html
+++ b/docs/intro_to_modeling/intro_to_modeling.html
@@ -418,7 +418,7 @@ <h2 data-number="10.2" class="anchored" data-anchor-id="simple-linear-regression
 <li><span class="math inline">\(\text{regression estimate} = y\text{-intercept} + \text{slope}\cdot\text{}x\)</span></li>
 <li><span class="math inline">\(\text{residual} =\text{observed }y - \text{regression estimate}\)</span></li>
 </ul>
-<div id="1ab7ac5a" class="cell" data-execution_count="1">
+<div id="d6f418ab" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -465,7 +465,7 @@ <h4 data-number="10.2.1.2" class="anchored" data-anchor-id="correlation"><span c
 <li>Correlations range between -1 and 1: <span class="math inline">\(|r| \leq 1\)</span>, with <span class="math inline">\(r=1\)</span> indicating perfect positive linear association, and <span class="math inline">\(r=-1\)</span> indicating perfect negative association. The closer <span class="math inline">\(r\)</span> is to <span class="math inline">\(0\)</span>, the weaker the linear association is.</li>
 <li>Correlation says nothing about causation and non-linear association. Correlation does <strong>not</strong> imply causation. When <span class="math inline">\(r = 0\)</span>, the two variables are uncorrelated. However, they could still be related through some non-linear relationship.</li>
 </ol>
-<div id="638e733e" class="cell" data-execution_count="2">
+<div id="456e69eb" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> plot_and_get_corr(ax, x, y, title):</span>
@@ -689,7 +689,7 @@ <h2 data-number="10.7" class="anchored" data-anchor-id="evaluating-the-slr-model
 <section id="four-mysterious-datasets-anscombes-quartet" class="level3" data-number="10.7.1">
 <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datasets-anscombes-quartet"><span class="header-section-number">10.7.1</span> Four Mysterious Datasets (Anscombe’s quartet)</h3>
 <p>Let’s take a look at four different datasets.</p>
-<div id="f06c2852" class="cell" data-execution_count="3">
+<div id="c8a8a30a" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
@@ -701,7 +701,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 <span id="cb3-7"><a href="#cb3-7" aria-hidden="true" tabindex="-1"></a><span class="im">from</span> mpl_toolkits.mplot3d <span class="im">import</span> Axes3D</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="0a263548" class="cell" data-execution_count="4">
+<div id="58981354" class="cell" data-execution_count="4">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Big font helper</span></span>
@@ -755,7 +755,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 <span id="cb4-49"><a href="#cb4-49" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">"default"</span>)  <span class="co"># Revert style to default mpl</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="7bec2a87" class="cell" data-execution_count="5">
+<div id="f0c62211" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>plt.style.use(<span class="st">"default"</span>)  <span class="co"># Revert style to default mpl</span></span>
@@ -794,7 +794,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 <span id="cb5-34"><a href="#cb5-34" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> fig</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="c6b48559" class="cell" data-execution_count="6">
+<div id="af3f30cd" class="cell" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Load in four different datasets: I, II, III, IV</span></span>
@@ -837,7 +837,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 </div>
 </div>
 <p>While these four sets of datapoints look very different, they actually all have identical means <span class="math inline">\(\bar x\)</span>, <span class="math inline">\(\bar y\)</span>, standard deviations <span class="math inline">\(\sigma_x\)</span>, <span class="math inline">\(\sigma_y\)</span>, correlation <span class="math inline">\(r\)</span>, and RMSE! If we only look at these statistics, we would probably be inclined to say that these datasets are similar.</p>
-<div id="2fbfddc5" class="cell" data-execution_count="7">
+<div id="d764b493" class="cell" data-execution_count="7">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="cf">for</span> dataset <span class="kw">in</span> [<span class="st">"I"</span>, <span class="st">"II"</span>, <span class="st">"III"</span>, <span class="st">"IV"</span>]:</span>
@@ -884,7 +884,7 @@ <h3 data-number="10.7.1" class="anchored" data-anchor-id="four-mysterious-datase
 </div>
 <p>We may also wish to visualize the model’s <strong>residuals</strong>, defined as the difference between the observed and predicted <span class="math inline">\(y_i\)</span> value (<span class="math inline">\(e_i = y_i - \hat{y}_i\)</span>). This gives a high-level view of how “off” each prediction is from the true observed value. Recall that you explored this concept in <a href="https://inferentialthinking.com/chapters/15/5/Visual_Diagnostics.html?highlight=heteroscedasticity#detecting-heteroscedasticity">Data 8</a>: a good regression fit should display no clear pattern in its plot of residuals. The residual plots for Anscombe’s quartet are displayed below. Note how only the first plot shows no clear pattern to the magnitude of residuals. This is an indication that SLR is not the best choice of model for the remaining three sets of points.</p>
 <!-- <img src="images/residual.png" alt='residual' width='600'> -->
-<div id="fd64711b" class="cell" data-execution_count="8">
+<div id="1d07f513" class="cell" data-execution_count="8">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Residual visualization</span></span>
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png b/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png
index ecd4e7cd..ee3d85a4 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png and b/docs/intro_to_modeling/intro_to_modeling_files/figure-html/cell-2-output-1.png differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf
index 58a5bede..f6466d3c 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-2-output-1.pdf differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf
index b2bedc72..aa691253 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-3-output-1.pdf differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf
index c9707d2f..c824d223 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-7-output-1.pdf differ
diff --git a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf
index 7303f056..aee518c1 100644
Binary files a/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf and b/docs/intro_to_modeling/intro_to_modeling_files/figure-pdf/cell-9-output-1.pdf differ
diff --git a/docs/ols/ols.html b/docs/ols/ols.html
index 12a4f862..eafb4b51 100644
--- a/docs/ols/ols.html
+++ b/docs/ols/ols.html
@@ -356,7 +356,7 @@ <h3 data-number="12.1.1" class="anchored" data-anchor-id="multiple-linear-regres
 <p><span class="math display">\[\hat{y} = \theta_0\:+\:\theta_1x_{1}\:+\:\theta_2 x_{2}\:+\:...\:+\:\theta_p x_{p}\]</span></p>
 <p>Our predicted value of <span class="math inline">\(y\)</span>, <span class="math inline">\(\hat{y}\)</span>, is a linear combination of the single <strong>observations</strong> (features), <span class="math inline">\(x_i\)</span>, and the parameters, <span class="math inline">\(\theta_i\)</span>.</p>
 <p>We can explore this idea further by looking at a dataset containing aggregate per-player data from the 2018-19 NBA season, downloaded from <a href="https://www.kaggle.com/schmadam97/nba-regular-season-stats-20182019">Kaggle</a>.</p>
-<div id="9de286c9" class="cell" data-execution_count="1">
+<div id="ad9425e6" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -364,7 +364,7 @@ <h3 data-number="12.1.1" class="anchored" data-anchor-id="multiple-linear-regres
 <span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a>nba.index.name <span class="op">=</span> <span class="va">None</span> <span class="co"># Drops name of index (players are ordered by rank)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="6945b254" class="cell" data-execution_count="2">
+<div id="cc375914" class="cell" data-execution_count="2">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>nba.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -535,7 +535,7 @@ <h3 data-number="12.1.1" class="anchored" data-anchor-id="multiple-linear-regres
 <li><code>AST</code>, the average number of assists per game</li>
 <li><code>3PA</code>, the average number of 3-point field goals attempted per game</li>
 </ul>
-<div id="e39b5cde" class="cell" data-execution_count="3">
+<div id="03f081f7" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>nba[[<span class="st">'FG'</span>, <span class="st">'AST'</span>, <span class="st">'3PA'</span>, <span class="st">'PTS'</span>]].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
diff --git a/docs/pandas_1/pandas_1.html b/docs/pandas_1/pandas_1.html
index 77472150..c427f13b 100644
--- a/docs/pandas_1/pandas_1.html
+++ b/docs/pandas_1/pandas_1.html
@@ -391,7 +391,7 @@ <h2 data-number="2.1" class="anchored" data-anchor-id="tabular-data"><span class
 <section id="series-dataframes-and-indices" class="level2" data-number="2.2">
 <h2 data-number="2.2" class="anchored" data-anchor-id="series-dataframes-and-indices"><span class="header-section-number">2.2</span> <code>Series</code>, <code>DataFrame</code>s, and Indices</h2>
 <p>To begin our work in <code>pandas</code>, we must first import the library into our Python environment. This will allow us to use <code>pandas</code> data structures and methods in our code.</p>
-<div id="d3e6fc8c" class="cell" data-execution_count="1">
+<div id="5f66c99a" class="cell" data-execution_count="1">
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># `pd` is the conventional alias for Pandas, as `np` is for NumPy</span></span>
 <span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
@@ -414,7 +414,7 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 <li>A sequence of data labels called the <strong>index</strong>.</li>
 </ul>
 <p>In the cell below, we create a <code>Series</code> named <code>s</code>.</p>
-<div id="324cc416" class="cell" data-execution_count="2">
+<div id="a9f7486c" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>s <span class="op">=</span> pd.Series([<span class="st">"welcome"</span>, <span class="st">"to"</span>, <span class="st">"data 100"</span>])</span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="2">
@@ -424,14 +424,14 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 dtype: object</code></pre>
 </div>
 </div>
-<div id="88ac1490" class="cell" data-execution_count="3">
+<div id="909a1df6" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a> <span class="co"># Accessing data values within the Series</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a> s.values</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="3">
 <pre><code>array(['welcome', 'to', 'data 100'], dtype=object)</code></pre>
 </div>
 </div>
-<div id="e0afb2d6" class="cell" data-execution_count="4">
+<div id="9acc9bb6" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a> <span class="co"># Accessing the Index of the Series</span></span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a> s.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="4">
@@ -439,7 +439,7 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 </div>
 </div>
 <p>By default, the <code>index</code> of a <code>Series</code> is a sequential list of integers beginning from 0. Optionally, a manually specified list of desired indices can be passed to the <code>index</code> argument.</p>
-<div id="a472d5cc" class="cell" data-execution_count="5">
+<div id="d60c5429" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>s <span class="op">=</span> pd.Series([<span class="op">-</span><span class="dv">1</span>, <span class="dv">10</span>, <span class="dv">2</span>], index <span class="op">=</span> [<span class="st">"a"</span>, <span class="st">"b"</span>, <span class="st">"c"</span>])</span>
 <span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
@@ -449,14 +449,14 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 dtype: int64</code></pre>
 </div>
 </div>
-<div id="9a9204b5" class="cell" data-execution_count="6">
+<div id="c62e5550" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>s.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="6">
 <pre><code>Index(['a', 'b', 'c'], dtype='object')</code></pre>
 </div>
 </div>
 <p>Indices can also be changed after initialization.</p>
-<div id="d7195765" class="cell" data-execution_count="7">
+<div id="8128b7e7" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>s.index <span class="op">=</span> [<span class="st">"first"</span>, <span class="st">"second"</span>, <span class="st">"third"</span>]</span>
 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
@@ -466,7 +466,7 @@ <h3 data-number="2.2.1" class="anchored" data-anchor-id="series"><span class="he
 dtype: int64</code></pre>
 </div>
 </div>
-<div id="cffe134d" class="cell" data-execution_count="8">
+<div id="dd4c6f7f" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>s.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
 <pre><code>Index(['first', 'second', 'third'], dtype='object')</code></pre>
@@ -481,7 +481,7 @@ <h4 data-number="2.2.1.1" class="anchored" data-anchor-id="selection-in-series">
 <li>A filtering condition.</li>
 </ol>
 <p>To demonstrate this, let’s define a new Series <code>s</code>.</p>
-<div id="94148cd2" class="cell" data-execution_count="9">
+<div id="f86af8bd" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>s <span class="op">=</span> pd.Series([<span class="dv">4</span>, <span class="op">-</span><span class="dv">2</span>, <span class="dv">0</span>, <span class="dv">6</span>], index <span class="op">=</span> [<span class="st">"a"</span>, <span class="st">"b"</span>, <span class="st">"c"</span>, <span class="st">"d"</span>])</span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>s</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
@@ -494,7 +494,7 @@ <h4 data-number="2.2.1.1" class="anchored" data-anchor-id="selection-in-series">
 </div>
 <section id="a-single-label" class="level5" data-number="2.2.1.1.1">
 <h5 data-number="2.2.1.1.1" class="anchored" data-anchor-id="a-single-label"><span class="header-section-number">2.2.1.1.1</span> A Single Label</h5>
-<div id="4a291752" class="cell" data-execution_count="10">
+<div id="91c86085" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We return the value stored at the index label "a"</span></span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>s[<span class="st">"a"</span>] </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="10">
@@ -504,7 +504,7 @@ <h5 data-number="2.2.1.1.1" class="anchored" data-anchor-id="a-single-label"><sp
 </section>
 <section id="a-list-of-labels" class="level5" data-number="2.2.1.1.2">
 <h5 data-number="2.2.1.1.2" class="anchored" data-anchor-id="a-list-of-labels"><span class="header-section-number">2.2.1.1.2</span> A List of Labels</h5>
-<div id="00997f1b" class="cell" data-execution_count="11">
+<div id="506d9002" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We return a Series of the values stored at the index labels "a" and "c"</span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>s[[<span class="st">"a"</span>, <span class="st">"c"</span>]] </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="11">
@@ -518,7 +518,7 @@ <h5 data-number="2.2.1.1.2" class="anchored" data-anchor-id="a-list-of-labels"><
 <h5 data-number="2.2.1.1.3" class="anchored" data-anchor-id="a-filtering-condition"><span class="header-section-number">2.2.1.1.3</span> A Filtering Condition</h5>
 <p>Perhaps the most interesting (and useful) method of selecting data from a <code>Series</code> is by using a filtering condition.</p>
 <p>First, we apply a boolean operation to the <code>Series</code>. This creates <strong>a new <code>Series</code> of boolean values</strong>.</p>
-<div id="17dd5c74" class="cell" data-execution_count="12">
+<div id="a589ef27" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Filter condition: select all elements greater than 0</span></span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>s <span class="op">&gt;</span> <span class="dv">0</span> </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
@@ -530,7 +530,7 @@ <h5 data-number="2.2.1.1.3" class="anchored" data-anchor-id="a-filtering-conditi
 </div>
 </div>
 <p>We then use this boolean condition to index into our original <code>Series</code>. <code>pandas</code> will select only the entries in the original <code>Series</code> that satisfy the condition.</p>
-<div id="b0eb403f" class="cell" data-execution_count="13">
+<div id="fd837c44" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>s[s <span class="op">&gt;</span> <span class="dv">0</span>] </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
 <pre><code>a    4
@@ -560,7 +560,7 @@ <h4 data-number="2.2.2.1" class="anchored" data-anchor-id="creating-a-dataframe"
 <h5 data-number="2.2.2.1.1" class="anchored" data-anchor-id="from-a-csv-file"><span class="header-section-number">2.2.2.1.1</span> From a CSV file</h5>
 <p>In Data 100, our data are typically stored in a CSV (comma-separated values) file format. We can import a CSV file into a <code>DataFrame</code> by passing the data path as an argument to the following <code>pandas</code> function. <br>  <code>pd.read_csv("filename.csv")</code></p>
 <p>With our new understanding of <code>pandas</code> in hand, let’s return to the <code>elections</code> dataset from before. Now, we can recognize that it is represented as a <code>pandas</code> <code>DataFrame</code>.</p>
-<div id="88f03fc5" class="cell" data-execution_count="14">
+<div id="5c200be2" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>elections <span class="op">=</span> pd.read_csv(<span class="st">"data/elections.csv"</span>)</span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>elections</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
@@ -692,7 +692,7 @@ <h5 data-number="2.2.2.1.1" class="anchored" data-anchor-id="from-a-csv-file"><s
 <h5 data-number="2.2.2.1.2" class="anchored" data-anchor-id="using-a-list-and-column-names"><span class="header-section-number">2.2.2.1.2</span> Using a List and Column Name(s)</h5>
 <p>We’ll now explore creating a <code>DataFrame</code> with data of our own.</p>
 <p>Consider the following examples. The first code cell creates a <code>DataFrame</code> with a single column <code>Numbers</code>.</p>
-<div id="9717c962" class="cell" data-execution_count="15">
+<div id="e2a7c5e4" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>df_list <span class="op">=</span> pd.DataFrame([<span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], columns<span class="op">=</span>[<span class="st">"Numbers"</span>])</span>
 <span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>df_list</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
@@ -726,7 +726,7 @@ <h5 data-number="2.2.2.1.2" class="anchored" data-anchor-id="using-a-list-and-co
 </div>
 </div>
 <p>The second creates a <code>DataFrame</code> with the columns <code>Numbers</code> and <code>Description</code>. Notice how a 2D list of values is required to initialize the second <code>DataFrame</code> — each nested list represents a single row of data.</p>
-<div id="699a275d" class="cell" data-execution_count="16">
+<div id="90eac722" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>df_list <span class="op">=</span> pd.DataFrame([[<span class="dv">1</span>, <span class="st">"one"</span>], [<span class="dv">2</span>, <span class="st">"two"</span>]], columns <span class="op">=</span> [<span class="st">"Number"</span>, <span class="st">"Description"</span>])</span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>df_list</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="16">
@@ -763,7 +763,7 @@ <h5 data-number="2.2.2.1.2" class="anchored" data-anchor-id="using-a-list-and-co
 <h5 data-number="2.2.2.1.3" class="anchored" data-anchor-id="from-a-dictionary"><span class="header-section-number">2.2.2.1.3</span> From a Dictionary</h5>
 <p>A third (and more common) way to create a <code>DataFrame</code> is with a dictionary. The dictionary keys represent the column names, and the dictionary values represent the column values.</p>
 <p>Below are two ways of implementing this approach. The first is based on specifying the columns of the <code>DataFrame</code>, whereas the second is based on specifying the rows of the <code>DataFrame</code>.</p>
-<div id="8c0b26b4" class="cell" data-execution_count="17">
+<div id="80678687" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>df_dict <span class="op">=</span> pd.DataFrame({</span>
 <span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">"Fruit"</span>: [<span class="st">"Strawberry"</span>, <span class="st">"Orange"</span>], </span>
 <span id="cb30-3"><a href="#cb30-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">"Price"</span>: [<span class="fl">5.49</span>, <span class="fl">3.99</span>]</span>
@@ -798,7 +798,7 @@ <h5 data-number="2.2.2.1.3" class="anchored" data-anchor-id="from-a-dictionary">
 </div>
 </div>
 </div>
-<div id="49716cd4" class="cell" data-execution_count="18">
+<div id="90caea62" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>df_dict <span class="op">=</span> pd.DataFrame(</span>
 <span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>    [</span>
 <span id="cb31-3"><a href="#cb31-3" aria-hidden="true" tabindex="-1"></a>        {<span class="st">"Fruit"</span>:<span class="st">"Strawberry"</span>, <span class="st">"Price"</span>:<span class="fl">5.49</span>}, </span>
@@ -840,14 +840,14 @@ <h5 data-number="2.2.2.1.3" class="anchored" data-anchor-id="from-a-dictionary">
 <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><span class="header-section-number">2.2.2.1.4</span> From a <code>Series</code></h5>
 <p>Earlier, we explained how a <code>Series</code> was synonymous to a column in a <code>DataFrame</code>. It follows, then, that a <code>DataFrame</code> is equivalent to a collection of <code>Series</code>, which all share the same <code>Index</code>.</p>
 <p>In fact, we can initialize a <code>DataFrame</code> by merging two or more <code>Series</code>. Consider the <code>Series</code> <code>s_a</code> and <code>s_b</code>.</p>
-<div id="066a0591" class="cell" data-execution_count="19">
+<div id="84dd7bd4" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Notice how our indices, or row labels, are the same</span></span>
 <span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb32-3"><a href="#cb32-3" aria-hidden="true" tabindex="-1"></a>s_a <span class="op">=</span> pd.Series([<span class="st">"a1"</span>, <span class="st">"a2"</span>, <span class="st">"a3"</span>], index <span class="op">=</span> [<span class="st">"r1"</span>, <span class="st">"r2"</span>, <span class="st">"r3"</span>])</span>
 <span id="cb32-4"><a href="#cb32-4" aria-hidden="true" tabindex="-1"></a>s_b <span class="op">=</span> pd.Series([<span class="st">"b1"</span>, <span class="st">"b2"</span>, <span class="st">"b3"</span>], index <span class="op">=</span> [<span class="st">"r1"</span>, <span class="st">"r2"</span>, <span class="st">"r3"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>We can turn individual <code>Series</code> into a <code>DataFrame</code> using two common methods (shown below):</p>
-<div id="c597fd3d" class="cell" data-execution_count="20">
+<div id="bff1ec85" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>pd.DataFrame(s_a)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="20">
 <div>
@@ -879,7 +879,7 @@ <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><spa
 </div>
 </div>
 </div>
-<div id="e8e6fafb" class="cell" data-execution_count="21">
+<div id="b67fc8f8" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>s_b.to_frame()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="21">
 <div>
@@ -912,7 +912,7 @@ <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><spa
 </div>
 </div>
 <p>To merge the two <code>Series</code> and specify their column names, we use the following syntax:</p>
-<div id="976a13c0" class="cell" data-execution_count="22">
+<div id="fe64d76c" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>pd.DataFrame({</span>
 <span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>    <span class="st">"A-column"</span>: s_a, </span>
 <span id="cb35-3"><a href="#cb35-3" aria-hidden="true" tabindex="-1"></a>    <span class="st">"B-column"</span>: s_b</span>
@@ -957,7 +957,7 @@ <h5 data-number="2.2.2.1.4" class="anchored" data-anchor-id="from-a-series"><spa
 <section id="indices" class="level3" data-number="2.2.3">
 <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="header-section-number">2.2.3</span> Indices</h3>
 <p>On a more technical note, an index doesn’t have to be an integer, nor does it have to be unique. For example, we can set the index of the <code>elections</code> <code>DataFrame</code> to be the name of presidential candidates.</p>
-<div id="59b55dd9" class="cell" data-execution_count="23">
+<div id="29589e78" class="cell" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Creating a DataFrame from a CSV file and specifying the index column</span></span>
 <span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>elections <span class="op">=</span> pd.read_csv(<span class="st">"data/elections.csv"</span>, index_col <span class="op">=</span> <span class="st">"Candidate"</span>)</span>
 <span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a>elections</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1081,7 +1081,7 @@ <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="h
 </div>
 </div>
 <p>We can also select a new column and set it as the index of the <code>DataFrame</code>. For example, we can set the index of the <code>elections</code> <code>DataFrame</code> to represent the candidate’s party.</p>
-<div id="7160c3df" class="cell" data-execution_count="24">
+<div id="20315de8" class="cell" data-execution_count="24">
 <div class="sourceCode cell-code" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>elections.reset_index(inplace <span class="op">=</span> <span class="va">True</span>) <span class="co"># Resetting the index so we can set it again</span></span>
 <span id="cb37-2"><a href="#cb37-2" aria-hidden="true" tabindex="-1"></a><span class="co"># This sets the index to the "Party" column</span></span>
 <span id="cb37-3"><a href="#cb37-3" aria-hidden="true" tabindex="-1"></a>elections.set_index(<span class="st">"Party"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1205,7 +1205,7 @@ <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="h
 </div>
 </div>
 <p>And, if we’d like, we can revert the index back to the default list of integers.</p>
-<div id="6f8a0c5e" class="cell" data-execution_count="25">
+<div id="aa6b5f52" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This resets the index to be the default list of integer</span></span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>elections.reset_index(inplace<span class="op">=</span><span class="va">True</span>) </span>
 <span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>elections.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1228,7 +1228,7 @@ <h3 data-number="2.2.3" class="anchored" data-anchor-id="indices"><span class="h
 <h2 data-number="2.3" class="anchored" data-anchor-id="dataframe-attributes-index-columns-and-shape"><span class="header-section-number">2.3</span> <code>DataFrame</code> Attributes: Index, Columns, and Shape</h2>
 <p>On the other hand, column names in a <code>DataFrame</code> are almost always unique. Looking back to the <code>elections</code> dataset, it wouldn’t make sense to have two columns named <code>"Candidate"</code>. Sometimes, you’ll want to extract these different values, in particular, the list of row and column labels.</p>
 <p>For index/row labels, use <code>DataFrame.index</code>:</p>
-<div id="f2dfbb5d" class="cell" data-execution_count="26">
+<div id="d1f8452b" class="cell" data-execution_count="26">
 <div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a>elections.set_index(<span class="st">"Party"</span>, inplace <span class="op">=</span> <span class="va">True</span>)</span>
 <span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>elections.index</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="26">
@@ -1243,14 +1243,14 @@ <h2 data-number="2.3" class="anchored" data-anchor-id="dataframe-attributes-inde
 </div>
 </div>
 <p>For column labels, use <code>DataFrame.columns</code>:</p>
-<div id="164a9d47" class="cell" data-execution_count="27">
+<div id="3f56ddae" class="cell" data-execution_count="27">
 <div class="sourceCode cell-code" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>elections.columns</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="27">
 <pre><code>Index(['index', 'Candidate', 'Year', 'Popular vote', 'Result', '%'], dtype='object')</code></pre>
 </div>
 </div>
 <p>And for the shape of the <code>DataFrame</code>, we can use <code>DataFrame.shape</code> to get the number of rows followed by the number of columns:</p>
-<div id="5bd18f4d" class="cell" data-execution_count="28">
+<div id="62c5549e" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb44"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>elections.shape</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
 <pre><code>(182, 6)</code></pre>
@@ -1279,13 +1279,13 @@ <h2 data-number="2.4" class="anchored" data-anchor-id="slicing-in-dataframes"><s
 <h3 data-number="2.4.1" class="anchored" data-anchor-id="extracting-data-with-.head-and-.tail"><span class="header-section-number">2.4.1</span> Extracting data with <code>.head</code> and <code>.tail</code></h3>
 <p>The simplest scenario in which we want to extract data is when we simply want to select the first or last few rows of the <code>DataFrame</code>.</p>
 <p>To extract the first <code>n</code> rows of a <code>DataFrame</code> <code>df</code>, we use the syntax <code>df.head(n)</code>.</p>
-<div id="6b088c84" class="cell" data-execution_count="29">
+<div id="d4e7e82e" class="cell" data-execution_count="29">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb46"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a>elections <span class="op">=</span> pd.read_csv(<span class="st">"data/elections.csv"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="62595259" class="cell" data-execution_count="30">
+<div id="a9c9c07f" class="cell" data-execution_count="30">
 <div class="sourceCode cell-code" id="cb47"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Extract the first 5 rows of the DataFrame</span></span>
 <span id="cb47-2"><a href="#cb47-2" aria-hidden="true" tabindex="-1"></a>elections.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="30">
@@ -1357,7 +1357,7 @@ <h3 data-number="2.4.1" class="anchored" data-anchor-id="extracting-data-with-.h
 </div>
 </div>
 <p>Similarly, calling <code>df.tail(n)</code> allows us to extract the last <code>n</code> rows of the <code>DataFrame</code>.</p>
-<div id="5028e292" class="cell" data-execution_count="31">
+<div id="63e35068" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb48"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Extract the last 5 rows of the DataFrame</span></span>
 <span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a>elections.tail(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="31">
@@ -1443,14 +1443,14 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 <li>A list.</li>
 </ul>
 <p>For example, to select a single value, we can select the row labeled <code>0</code> and the column labeled <code>Candidate</code> from the <code>elections</code> <code>DataFrame</code>.</p>
-<div id="5ca0120c" class="cell" data-execution_count="32">
+<div id="1336474d" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a>elections.loc[<span class="dv">0</span>, <span class="st">'Candidate'</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
 <pre><code>'Andrew Jackson'</code></pre>
 </div>
 </div>
 <p>Keep in mind that passing in just one argument as a single value will produce a <code>Series</code>. Below, we’ve extracted a subset of the <code>"Popular vote"</code> column as a <code>Series</code>.</p>
-<div id="17512214" class="cell" data-execution_count="33">
+<div id="39004b55" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb51"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">87</span>, <span class="dv">25</span>, <span class="dv">179</span>], <span class="st">"Popular vote"</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="33">
 <pre><code>87     15761254
@@ -1460,7 +1460,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>Note that if we pass <code>"Popular vote"</code> as a list, the output will be a <code>DataFrame</code>.</p>
-<div id="15a6b213" class="cell" data-execution_count="34">
+<div id="d0d287f6" class="cell" data-execution_count="34">
 <div class="sourceCode cell-code" id="cb53"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb53-1"><a href="#cb53-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">87</span>, <span class="dv">25</span>, <span class="dv">179</span>], [<span class="st">"Popular vote"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="34">
 <div>
@@ -1493,7 +1493,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>To select <em>multiple</em> rows and columns, we can use Python slice notation. Here, we select the rows from labels <code>0</code> to <code>3</code> and the columns from labels <code>"Year"</code> to <code>"Popular vote"</code>. Notice that unlike Python slicing, <code>.loc</code> is <em>inclusive</em> of the right upper bound.</p>
-<div id="20303719" class="cell" data-execution_count="35">
+<div id="88fd874b" class="cell" data-execution_count="35">
 <div class="sourceCode cell-code" id="cb54"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a>elections.loc[<span class="dv">0</span>:<span class="dv">3</span>, <span class="st">'Year'</span>:<span class="st">'Popular vote'</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="35">
 <div>
@@ -1545,7 +1545,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>Suppose that instead, we want to extract <em>all</em> column values for the first four rows in the <code>elections</code> <code>DataFrame</code>. The shorthand <code>:</code> is useful for this.</p>
-<div id="c0f49739" class="cell" data-execution_count="36">
+<div id="3e00cee6" class="cell" data-execution_count="36">
 <div class="sourceCode cell-code" id="cb55"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a>elections.loc[<span class="dv">0</span>:<span class="dv">3</span>, :]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="36">
 <div>
@@ -1607,7 +1607,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>We can use the same shorthand to extract all rows.</p>
-<div id="ad598496" class="cell" data-execution_count="37">
+<div id="ade635d1" class="cell" data-execution_count="37">
 <div class="sourceCode cell-code" id="cb56"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>elections.loc[:, [<span class="st">"Year"</span>, <span class="st">"Candidate"</span>, <span class="st">"Result"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="37">
 <div>
@@ -1698,7 +1698,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 <p>There are a couple of things we should note. Firstly, unlike conventional Python, <code>pandas</code> allows us to slice string values (in our example, the column labels). Secondly, slicing with <code>.loc</code> is <em>inclusive</em>. Notice how our resulting <code>DataFrame</code> includes every row and column between and including the slice labels we specified.</p>
 <p>Equivalently, we can use a list to obtain multiple rows and columns in our <code>elections</code> <code>DataFrame</code>.</p>
-<div id="af078c73" class="cell" data-execution_count="38">
+<div id="37457085" class="cell" data-execution_count="38">
 <div class="sourceCode cell-code" id="cb57"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], [<span class="st">'Year'</span>, <span class="st">'Candidate'</span>, <span class="st">'Party'</span>, <span class="st">'Popular vote'</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="38">
 <div>
@@ -1750,7 +1750,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 </div>
 </div>
 <p>Lastly, we can interchange list and slicing notation.</p>
-<div id="48a160f4" class="cell" data-execution_count="39">
+<div id="11a91cf9" class="cell" data-execution_count="39">
 <div class="sourceCode cell-code" id="cb58"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a>elections.loc[[<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], :]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="39">
 <div>
@@ -1816,7 +1816,7 @@ <h3 data-number="2.4.2" class="anchored" data-anchor-id="label-based-extraction-
 <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extraction-indexing-with-.iloc"><span class="header-section-number">2.4.3</span> Integer-based Extraction: Indexing with <code>.iloc</code></h3>
 <p>Slicing with <code>.iloc</code> works similarly to <code>.loc</code>. However, <code>.iloc</code> uses the <em>index positions</em> of rows and columns rather than the labels (think to yourself: <strong>l</strong>oc uses <strong>l</strong>ables; <strong>i</strong>loc uses <strong>i</strong>ndices). The arguments to the <code>.iloc</code> function also behave similarly — single values, lists, indices, and any combination of these are permitted.</p>
 <p>Let’s begin reproducing our results from above. We’ll begin by selecting the first presidential candidate in our <code>elections</code> <code>DataFrame</code>:</p>
-<div id="9b76b58e" class="cell" data-execution_count="40">
+<div id="cc6461a4" class="cell" data-execution_count="40">
 <div class="sourceCode cell-code" id="cb59"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a><span class="co"># elections.loc[0, "Candidate"] - Previous approach</span></span>
 <span id="cb59-2"><a href="#cb59-2" aria-hidden="true" tabindex="-1"></a>elections.iloc[<span class="dv">0</span>, <span class="dv">1</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="40">
@@ -1825,7 +1825,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 <p>Notice how the first argument to both <code>.loc</code> and <code>.iloc</code> are the same. This is because the row with a label of <code>0</code> is conveniently in the <span class="math inline">\(0^{\text{th}}\)</span> (equivalently, the first position) of the <code>elections</code> <code>DataFrame</code>. Generally, this is true of any <code>DataFrame</code> where the row labels are incremented in ascending order from 0.</p>
 <p>And, as before, if we were to pass in only one single value argument, our result would be a <code>Series</code>.</p>
-<div id="bbffdef8" class="cell" data-execution_count="41">
+<div id="b01a85a8" class="cell" data-execution_count="41">
 <div class="sourceCode cell-code" id="cb61"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb61-1"><a href="#cb61-1" aria-hidden="true" tabindex="-1"></a>elections.iloc[[<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">3</span>],<span class="dv">1</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="41">
 <pre><code>1    John Quincy Adams
@@ -1835,7 +1835,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 </div>
 <p>However, when we select the first four rows and columns using <code>.iloc</code>, we notice something.</p>
-<div id="30829e5e" class="cell" data-execution_count="42">
+<div id="d81d0057" class="cell" data-execution_count="42">
 <div class="sourceCode cell-code" id="cb63"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb63-1"><a href="#cb63-1" aria-hidden="true" tabindex="-1"></a><span class="co"># elections.loc[0:3, 'Year':'Popular vote'] - Previous approach</span></span>
 <span id="cb63-2"><a href="#cb63-2" aria-hidden="true" tabindex="-1"></a>elections.iloc[<span class="dv">0</span>:<span class="dv">4</span>, <span class="dv">0</span>:<span class="dv">4</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="42">
@@ -1889,7 +1889,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 <p>Slicing is no longer inclusive in <code>.iloc</code> — it’s <em>exclusive</em>. In other words, the right end of a slice is not included when using <code>.iloc</code>. This is one of the subtleties of <code>pandas</code> syntax; you will get used to it with practice.</p>
 <p>List behavior works just as expected.</p>
-<div id="fbe4f51c" class="cell" data-execution_count="43">
+<div id="a0ab9ee0" class="cell" data-execution_count="43">
 <div class="sourceCode cell-code" id="cb64"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb64-1"><a href="#cb64-1" aria-hidden="true" tabindex="-1"></a><span class="co">#elections.loc[[0, 1, 2, 3], ['Year', 'Candidate', 'Party', 'Popular vote']] - Previous Approach</span></span>
 <span id="cb64-2"><a href="#cb64-2" aria-hidden="true" tabindex="-1"></a>elections.iloc[[<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>], [<span class="dv">0</span>, <span class="dv">1</span>, <span class="dv">2</span>, <span class="dv">3</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="43">
@@ -1942,7 +1942,7 @@ <h3 data-number="2.4.3" class="anchored" data-anchor-id="integer-based-extractio
 </div>
 </div>
 <p>And just like with <code>.loc</code>, we can use a colon with <code>.iloc</code> to extract all rows or columns.</p>
-<div id="f85e2f70" class="cell" data-execution_count="44">
+<div id="574795ce" class="cell" data-execution_count="44">
 <div class="sourceCode cell-code" id="cb65"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb65-1"><a href="#cb65-1" aria-hidden="true" tabindex="-1"></a>elections.iloc[:, <span class="dv">0</span>:<span class="dv">3</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="44">
 <div>
@@ -2050,7 +2050,7 @@ <h3 data-number="2.4.4" class="anchored" data-anchor-id="context-dependent-extra
 <section id="a-slice-of-row-numbers" class="level4" data-number="2.4.4.1">
 <h4 data-number="2.4.4.1" class="anchored" data-anchor-id="a-slice-of-row-numbers"><span class="header-section-number">2.4.4.1</span> A slice of row numbers</h4>
 <p>Say we wanted the first four rows of our <code>elections</code> <code>DataFrame</code>.</p>
-<div id="86a508e7" class="cell" data-execution_count="45">
+<div id="8a56fa5e" class="cell" data-execution_count="45">
 <div class="sourceCode cell-code" id="cb66"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb66-1"><a href="#cb66-1" aria-hidden="true" tabindex="-1"></a>elections[<span class="dv">0</span>:<span class="dv">4</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="45">
 <div>
@@ -2115,7 +2115,7 @@ <h4 data-number="2.4.4.1" class="anchored" data-anchor-id="a-slice-of-row-number
 <section id="a-list-of-column-labels" class="level4" data-number="2.4.4.2">
 <h4 data-number="2.4.4.2" class="anchored" data-anchor-id="a-list-of-column-labels"><span class="header-section-number">2.4.4.2</span> A list of column labels</h4>
 <p>Suppose we now want the first four columns.</p>
-<div id="77712989" class="cell" data-execution_count="46">
+<div id="f75eedb8" class="cell" data-execution_count="46">
 <div class="sourceCode cell-code" id="cb67"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb67-1"><a href="#cb67-1" aria-hidden="true" tabindex="-1"></a>elections[[<span class="st">"Year"</span>, <span class="st">"Candidate"</span>, <span class="st">"Party"</span>, <span class="st">"Popular vote"</span>]]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="46">
 <div>
@@ -2220,7 +2220,7 @@ <h4 data-number="2.4.4.2" class="anchored" data-anchor-id="a-list-of-column-labe
 <section id="a-single-column-label" class="level4" data-number="2.4.4.3">
 <h4 data-number="2.4.4.3" class="anchored" data-anchor-id="a-single-column-label"><span class="header-section-number">2.4.4.3</span> A single-column label</h4>
 <p>Lastly, <code>[]</code> allows us to extract only the <code>"Candidate"</code> column.</p>
-<div id="ab01fb56" class="cell" data-execution_count="47">
+<div id="b05c8c9f" class="cell" data-execution_count="47">
 <div class="sourceCode cell-code" id="cb68"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb68-1"><a href="#cb68-1" aria-hidden="true" tabindex="-1"></a>elections[<span class="st">"Candidate"</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="47">
 <pre><code>0         Andrew Jackson
diff --git a/docs/pandas_2/pandas_2.html b/docs/pandas_2/pandas_2.html
index fc2198ca..ba9aec17 100644
--- a/docs/pandas_2/pandas_2.html
+++ b/docs/pandas_2/pandas_2.html
@@ -316,7 +316,7 @@ <h1 class="title"><span class="chapter-number">3</span>&nbsp; <span class="chapt
 <p>Last time, we introduced the <code>pandas</code> library as a toolkit for processing data. We learned the <code>DataFrame</code> and <code>Series</code> data structures, familiarized ourselves with the basic syntax for manipulating tabular data, and began writing our first lines of <code>pandas</code> code.</p>
 <p>In this lecture, we’ll start to dive into some advanced <code>pandas</code> syntax. You may find it helpful to follow along with a notebook of your own as we walk through these new pieces of code.</p>
 <p>We’ll start by loading the <code>babynames</code> dataset.</p>
-<div id="d14bfbbb" class="cell" data-execution_count="1">
+<div id="195e41b5" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This code pulls census data and loads it into a DataFrame</span></span>
@@ -409,7 +409,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 <p>Conditional selection allows us to select a subset of rows in a <code>DataFrame</code> that satisfy some specified condition.</p>
 <p>To understand how to use conditional selection, we must look at another possible input of the <code>.loc</code> and <code>[]</code> methods – a boolean array, which is simply an array or <code>Series</code> where each element is either <code>True</code> or <code>False</code>. This boolean array must have a length equal to the number of rows in the <code>DataFrame</code>. It will return all rows that correspond to a value of <code>True</code> in the array. We used a very similar technique when performing conditional extraction from a <code>Series</code> in the last lecture.</p>
 <p>To see this in action, let’s select all even-indexed rows in the first 10 rows of our <code>DataFrame</code>.</p>
-<div id="5365507b" class="cell" data-execution_count="2">
+<div id="c7fdc514" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Ask yourself: why is :9 is the correct slice to select the first 10 rows?</span></span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>babynames_first_10_rows <span class="op">=</span> babynames.loc[:<span class="dv">9</span>, :]</span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -478,7 +478,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>We can perform a similar operation using <code>.loc</code>.</p>
-<div id="fa70a9b8" class="cell" data-execution_count="3">
+<div id="8da7c4b5" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>babynames_first_10_rows.loc[[<span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>, <span class="va">True</span>, <span class="va">False</span>], :]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="3">
 <div>
@@ -544,7 +544,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>These techniques worked well in this example, but you can imagine how tedious it might be to list out <code>True</code> and <code>False</code>for every row in a larger <code>DataFrame</code>. To make things easier, we can instead provide a logical condition as an input to <code>.loc</code> or <code>[]</code> that returns a boolean array with the necessary length.</p>
 <p>For example, to return all names associated with <code>F</code> sex:</p>
-<div id="9a41df9a" class="cell" data-execution_count="4">
+<div id="daa0f2c6" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># First, use a logical condition to generate a boolean array</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>logical_operator <span class="op">=</span> (babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>)</span>
 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -614,7 +614,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>Recall from the previous lecture that <code>.head()</code> will return only the first few rows in the <code>DataFrame</code>. In reality, <code>babynames[logical operator]</code> contains as many rows as there are entries in the original <code>babynames</code> <code>DataFrame</code> with sex <code>"F"</code>.</p>
 <p>Here, <code>logical_operator</code> evaluates to a <code>Series</code> of boolean values with length 407428.</p>
-<div id="57b63ee3" class="cell" data-execution_count="5">
+<div id="f275bf55" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"There are a total of </span><span class="sc">{}</span><span class="st"> values in 'logical_operator'"</span>.<span class="bu">format</span>(<span class="bu">len</span>(logical_operator)))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -624,7 +624,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>Rows starting at row 0 and ending at row 239536 evaluate to <code>True</code> and are thus returned in the <code>DataFrame</code>. Rows from 239537 onwards evaluate to <code>False</code> and are omitted from the output.</p>
-<div id="42954dbe" class="cell" data-execution_count="6">
+<div id="831f1fbb" class="cell" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(<span class="st">"The 0th item in this 'logical_operator' is: </span><span class="sc">{}</span><span class="st">"</span>.<span class="bu">format</span>(logical_operator.iloc[<span class="dv">0</span>]))</span>
@@ -639,7 +639,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>Passing a <code>Series</code> as an argument to <code>babynames[]</code> has the same effect as using a boolean array. In fact, the <code>[]</code> selection operator can take a boolean <code>Series</code>, array, and list as arguments. These three are used interchangeably throughout the course.</p>
 <p>We can also use <code>.loc</code> to achieve similar results.</p>
-<div id="b20153f9" class="cell" data-execution_count="7">
+<div id="bba41923" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>babynames.loc[babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
 <div>
@@ -737,7 +737,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </table>
 <p>When combining multiple conditions with logical operators, we surround each individual condition with a set of parenthesis <code>()</code>. This imposes an order of operations on <code>pandas</code> evaluating your logic and can avoid code erroring.</p>
 <p>For example, if we want to return data on all names with sex <code>"F"</code> born before the year 2000, we can write:</p>
-<div id="498631fb" class="cell" data-execution_count="8">
+<div id="b10c1e13" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>babynames[(babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>) <span class="op">&amp;</span> (babynames[<span class="st">"Year"</span>] <span class="op">&lt;</span> <span class="dv">2000</span>)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
 <div>
@@ -802,12 +802,12 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>Note that we’re working with <code>Series</code>, so using <code>and</code> in place of <code>&amp;</code>, or <code>or</code> in place <code>|</code> will error.</p>
-<div id="48459de4" class="cell" data-execution_count="9">
+<div id="49c18b88" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This line of code will raise a ValueError</span></span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a><span class="co"># babynames[(babynames["Sex"] == "F") and (babynames["Year"] &lt; 2000)].head()</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>If we want to return data on all names with sex <code>"F"</code> <em>or</em> all born before the year 2000, we can write:</p>
-<div id="3212be20" class="cell" data-execution_count="10">
+<div id="905a2634" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>babynames[(babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>) <span class="op">|</span> (babynames[<span class="st">"Year"</span>] <span class="op">&lt;</span> <span class="dv">2000</span>)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="10">
 <div>
@@ -872,7 +872,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>Boolean array selection is a useful tool, but can lead to overly verbose code for complex conditions. In the example below, our boolean condition is long enough to extend for several lines of code.</p>
-<div id="6a4fed7e" class="cell" data-execution_count="11">
+<div id="a9c9f068" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Note: The parentheses surrounding the code make it possible to break the code on to multiple lines for readability</span></span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>(</span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>    babynames[(babynames[<span class="st">"Name"</span>] <span class="op">==</span> <span class="st">"Bella"</span>) <span class="op">|</span> </span>
@@ -944,7 +944,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 <p>Fortunately, <code>pandas</code> provides many alternative methods for constructing boolean filters.</p>
 <p>The <code>.isin</code> function is one such example. This method evaluates if the values in a <code>Series</code> are contained in a different sequence (list, array, or <code>Series</code>) of values. In the cell below, we achieve equivalent results to the <code>DataFrame</code> above with far more concise code.</p>
-<div id="59adb580" class="cell" data-execution_count="12">
+<div id="8bfc6326" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>names <span class="op">=</span> [<span class="st">"Bella"</span>, <span class="st">"Alex"</span>, <span class="st">"Narges"</span>, <span class="st">"Lisa"</span>]</span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].isin(names).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
@@ -956,7 +956,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 Name: Name, dtype: bool</code></pre>
 </div>
 </div>
-<div id="68df877b" class="cell" data-execution_count="13">
+<div id="92980f50" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>babynames[babynames[<span class="st">"Name"</span>].isin(names)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
 <div>
@@ -1021,7 +1021,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 </div>
 </div>
 <p>The function <code>str.startswith</code> can be used to define a filter based on string values in a <code>Series</code> object. It checks to see if string values in a <code>Series</code> start with a particular character.</p>
-<div id="814e9c5c" class="cell" data-execution_count="14">
+<div id="029a9749" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Identify whether names begin with the letter "N"</span></span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.startswith(<span class="st">"N"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
@@ -1033,7 +1033,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 Name: Name, dtype: bool</code></pre>
 </div>
 </div>
-<div id="53aeee41" class="cell" data-execution_count="15">
+<div id="0e263760" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Extracting names that begin with the letter "N"</span></span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>babynames[babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.startswith(<span class="st">"N"</span>)].head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
@@ -1103,7 +1103,7 @@ <h2 data-number="3.1" class="anchored" data-anchor-id="conditional-selection"><s
 <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modifying-columns"><span class="header-section-number">3.2</span> Adding, Removing, and Modifying Columns</h2>
 <p>In many data science tasks, we may need to change the columns contained in our <code>DataFrame</code> in some way. Fortunately, the syntax to do so is fairly straightforward.</p>
 <p>To add a new column to a <code>DataFrame</code>, we use a syntax similar to that used when accessing an existing column. Specify the name of the new column by writing <code>df["column"]</code>, then assign this to a <code>Series</code> or array containing the values that will populate this column.</p>
-<div id="28a57146" class="cell" data-execution_count="16">
+<div id="0ec9f530" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a Series of the length of each name. </span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>babyname_lengths <span class="op">=</span> babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.<span class="bu">len</span>()</span>
 <span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1179,7 +1179,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 </div>
 <p>If we need to later modify an existing column, we can do so by referencing this column again with the syntax <code>df["column"]</code>, then re-assigning it to a new <code>Series</code> or array of the appropriate length.</p>
-<div id="2e0f5a5a" class="cell" data-execution_count="17">
+<div id="808ab202" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Modify the “name_lengths” column to be one less than its original value</span></span>
 <span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"name_lengths"</span>] <span class="op">=</span> babynames[<span class="st">"name_lengths"</span>] <span class="op">-</span> <span class="dv">1</span></span>
 <span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>babynames.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1252,7 +1252,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 </div>
 <p>We can rename a column using the <code>.rename()</code> method. It takes in a dictionary that maps old column names to their new ones.</p>
-<div id="17060b84" class="cell" data-execution_count="18">
+<div id="2cdc9cec" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Rename “name_lengths” to “Length”</span></span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.rename(columns<span class="op">=</span>{<span class="st">"name_lengths"</span>:<span class="st">"Length"</span>})</span>
 <span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a>babynames.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1325,7 +1325,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 </div>
 <p>If we want to remove a column or row of a <code>DataFrame</code>, we can call the <code>.drop</code> <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.drop.html">(documentation)</a> method. Use the <code>axis</code> parameter to specify whether a column or row should be dropped. Unless otherwise specified, <code>pandas</code> will assume that we are dropping a row by default.</p>
-<div id="4b969dfa" class="cell" data-execution_count="19">
+<div id="b62cab5b" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Drop our new "Length" column from the DataFrame</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.drop(<span class="st">"Length"</span>, axis<span class="op">=</span><span class="st">"columns"</span>)</span>
 <span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1393,7 +1393,7 @@ <h2 data-number="3.2" class="anchored" data-anchor-id="adding-removing-and-modif
 </div>
 <p>Notice that we <em>re-assigned</em> <code>babynames</code> to the result of <code>babynames.drop(...)</code>. This is a subtle but important point: <code>pandas</code> table operations <strong>do not occur in-place</strong>. Calling <code>df.drop(...)</code> will output a <em>copy</em> of <code>df</code> with the row/column of interest removed without modifying the original <code>df</code> table.</p>
 <p>In other words, if we simply call:</p>
-<div id="0d49f159" class="cell" data-execution_count="20">
+<div id="ed4050e2" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This creates a copy of `babynames` and removes the column "Name"...</span></span>
 <span id="cb24-2"><a href="#cb24-2" aria-hidden="true" tabindex="-1"></a>babynames.drop(<span class="st">"Name"</span>, axis<span class="op">=</span><span class="st">"columns"</span>)</span>
 <span id="cb24-3"><a href="#cb24-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1481,7 +1481,7 @@ <h2 data-number="3.3" class="anchored" data-anchor-id="useful-utility-functions"
 <section id="numpy" class="level3" data-number="3.3.1">
 <h3 data-number="3.3.1" class="anchored" data-anchor-id="numpy"><span class="header-section-number">3.3.1</span> <code>NumPy</code></h3>
 <p><code>pandas</code> is designed to work well with <code>NumPy</code>, the framework for array computations you encountered in <a href="https://www.data8.org/su23/reference/#array-functions-and-methods">Data 8</a>. Just about any <code>NumPy</code> function can be applied to <code>pandas</code> <code>DataFrame</code>s and <code>Series</code>.</p>
-<div id="e12f0bbb" class="cell" data-execution_count="21">
+<div id="a0a80f48" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Pull out the number of babies named Yash each year</span></span>
 <span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>yash_count <span class="op">=</span> babynames[babynames[<span class="st">"Name"</span>] <span class="op">==</span> <span class="st">"Yash"</span>][<span class="st">"Count"</span>]</span>
 <span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a>yash_count.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1494,14 +1494,14 @@ <h3 data-number="3.3.1" class="anchored" data-anchor-id="numpy"><span class="hea
 Name: Count, dtype: int64</code></pre>
 </div>
 </div>
-<div id="84b554e4" class="cell" data-execution_count="22">
+<div id="b8417f82" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Average number of babies named Yash each year</span></span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>np.mean(yash_count)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="22">
 <pre><code>np.float64(17.142857142857142)</code></pre>
 </div>
 </div>
-<div id="0f777e24" class="cell" data-execution_count="23">
+<div id="f39cda73" class="cell" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Max number of babies named Yash born in any one year</span></span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>np.<span class="bu">max</span>(yash_count)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="23">
@@ -1513,14 +1513,14 @@ <h3 data-number="3.3.1" class="anchored" data-anchor-id="numpy"><span class="hea
 <h3 data-number="3.3.2" class="anchored" data-anchor-id="shape-and-.size"><span class="header-section-number">3.3.2</span> <code>.shape</code> and <code>.size</code></h3>
 <p><code>.shape</code> and <code>.size</code> are attributes of <code>Series</code> and <code>DataFrame</code>s that measure the “amount” of data stored in the structure. Calling <code>.shape</code> returns a tuple containing the number of rows and columns present in the <code>DataFrame</code> or <code>Series</code>. <code>.size</code> is used to find the total number of elements in a structure, equivalent to the number of rows times the number of columns.</p>
 <p>Many functions strictly require the dimensions of the arguments along certain axes to match. Calling these dimension-finding functions is much faster than counting all of the items by hand.</p>
-<div id="f3f0d45a" class="cell" data-execution_count="24">
+<div id="5ef37fe4" class="cell" data-execution_count="24">
 <div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Return the shape of the DataFrame, in the format (num_rows, num_columns)</span></span>
 <span id="cb31-2"><a href="#cb31-2" aria-hidden="true" tabindex="-1"></a>babynames.shape</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="24">
 <pre><code>(407428, 5)</code></pre>
 </div>
 </div>
-<div id="eec2fb07" class="cell" data-execution_count="25">
+<div id="82d99222" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Return the size of the DataFrame, equal to num_rows * num_columns</span></span>
 <span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a>babynames.size</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="25">
@@ -1531,7 +1531,7 @@ <h3 data-number="3.3.2" class="anchored" data-anchor-id="shape-and-.size"><span
 <section id="describe" class="level3" data-number="3.3.3">
 <h3 data-number="3.3.3" class="anchored" data-anchor-id="describe"><span class="header-section-number">3.3.3</span> <code>.describe()</code></h3>
 <p>If many statistics are required from a <code>DataFrame</code> (minimum value, maximum value, mean value, etc.), then <code>.describe()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.describe.html">(documentation)</a> can be used to compute all of them at once.</p>
-<div id="fd5b231d" class="cell" data-execution_count="26">
+<div id="2e2c39c4" class="cell" data-execution_count="26">
 <div class="sourceCode cell-code" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>babynames.describe()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="26">
 <div>
@@ -1593,7 +1593,7 @@ <h3 data-number="3.3.3" class="anchored" data-anchor-id="describe"><span class="
 </div>
 </div>
 <p>A different set of statistics will be reported if <code>.describe()</code> is called on a <code>Series</code>.</p>
-<div id="706aa933" class="cell" data-execution_count="27">
+<div id="f52c2ffc" class="cell" data-execution_count="27">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Sex"</span>].describe()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="27">
 <pre><code>count     407428
@@ -1608,7 +1608,7 @@ <h3 data-number="3.3.3" class="anchored" data-anchor-id="describe"><span class="
 <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="header-section-number">3.3.4</span> <code>.sample()</code></h3>
 <p>As we will see later in the semester, random processes are at the heart of many data science techniques (for example, train-test splits, bootstrapping, and cross-validation). <code>.sample()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sample.html">(documentation)</a> lets us quickly select random entries (a row if called from a <code>DataFrame</code>, or a value if called from a <code>Series</code>).</p>
 <p>By default, <code>.sample()</code> selects entries <em>without</em> replacement. Pass in the argument <code>replace=True</code> to sample with replacement.</p>
-<div id="77f4550e" class="cell" data-execution_count="28">
+<div id="41f699dc" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sample a single row</span></span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>babynames.sample()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
@@ -1628,12 +1628,12 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">128546</td>
+<td data-quarto-table-cell-role="th">57822</td>
 <td>CA</td>
 <td>F</td>
-<td>1994</td>
-<td>Janel</td>
-<td>31</td>
+<td>1968</td>
+<td>Laverne</td>
+<td>14</td>
 </tr>
 </tbody>
 </table>
@@ -1642,7 +1642,7 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </div>
 </div>
 <p>Naturally, this can be chained with other methods and operators (<code>iloc</code>, etc.).</p>
-<div id="daa2de6c" class="cell" data-execution_count="29">
+<div id="1966abcb" class="cell" data-execution_count="29">
 <div class="sourceCode cell-code" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sample 5 random rows, and select all columns after column 2</span></span>
 <span id="cb39-2"><a href="#cb39-2" aria-hidden="true" tabindex="-1"></a>babynames.sample(<span class="dv">5</span>).iloc[:, <span class="dv">2</span>:]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="29">
@@ -1660,34 +1660,34 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">307473</td>
-<td>1985</td>
-<td>Jacques</td>
-<td>14</td>
+<td data-quarto-table-cell-role="th">141121</td>
+<td>1997</td>
+<td>Aolani</td>
+<td>6</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">256794</td>
-<td>1944</td>
-<td>Leroy</td>
-<td>153</td>
+<td data-quarto-table-cell-role="th">313586</td>
+<td>1988</td>
+<td>Shant</td>
+<td>18</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">372703</td>
-<td>2010</td>
-<td>Theron</td>
-<td>5</td>
+<td data-quarto-table-cell-role="th">206812</td>
+<td>2014</td>
+<td>Aulani</td>
+<td>25</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">281347</td>
-<td>1968</td>
-<td>Valentin</td>
-<td>12</td>
+<td data-quarto-table-cell-role="th">289521</td>
+<td>1974</td>
+<td>Darick</td>
+<td>8</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">252914</td>
-<td>1938</td>
-<td>Albert</td>
-<td>303</td>
+<td data-quarto-table-cell-role="th">180694</td>
+<td>2007</td>
+<td>Kaylanie</td>
+<td>5</td>
 </tr>
 </tbody>
 </table>
@@ -1695,7 +1695,7 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </div>
 </div>
 </div>
-<div id="c21917ea" class="cell" data-execution_count="30">
+<div id="2082da35" class="cell" data-execution_count="30">
 <div class="sourceCode cell-code" id="cb40"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb40-1"><a href="#cb40-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Randomly sample 4 names from the year 2000, with replacement, and select all columns after column 2</span></span>
 <span id="cb40-2"><a href="#cb40-2" aria-hidden="true" tabindex="-1"></a>babynames[babynames[<span class="st">"Year"</span>] <span class="op">==</span> <span class="dv">2000</span>].sample(<span class="dv">4</span>, replace <span class="op">=</span> <span class="va">True</span>).iloc[:, <span class="dv">2</span>:]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="30">
@@ -1713,28 +1713,28 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 </thead>
 <tbody>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">343316</td>
+<td data-quarto-table-cell-role="th">149362</td>
 <td>2000</td>
-<td>Jacobo</td>
-<td>20</td>
+<td>Rose</td>
+<td>145</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">151522</td>
+<td data-quarto-table-cell-role="th">149859</td>
 <td>2000</td>
-<td>Jensen</td>
-<td>8</td>
+<td>Aliya</td>
+<td>40</td>
 </tr>
 <tr class="odd">
-<td data-quarto-table-cell-role="th">150931</td>
+<td data-quarto-table-cell-role="th">149281</td>
 <td>2000</td>
-<td>Alyssia</td>
-<td>11</td>
+<td>Jada</td>
+<td>193</td>
 </tr>
 <tr class="even">
-<td data-quarto-table-cell-role="th">152280</td>
+<td data-quarto-table-cell-role="th">343784</td>
 <td>2000</td>
-<td>Zayda</td>
-<td>6</td>
+<td>Tristian</td>
+<td>11</td>
 </tr>
 </tbody>
 </table>
@@ -1747,7 +1747,7 @@ <h3 data-number="3.3.4" class="anchored" data-anchor-id="sample"><span class="he
 <h3 data-number="3.3.5" class="anchored" data-anchor-id="value_counts"><span class="header-section-number">3.3.5</span> <code>.value_counts()</code></h3>
 <p>The <code>Series.value_counts()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.Series.value_counts.html">(documentation)</a> method counts the number of occurrence of each unique value in a <code>Series</code>. In other words, it <em>counts</em> the number of times each unique <em>value</em> appears. This is often useful for determining the most or least common entries in a <code>Series</code>.</p>
 <p>In the example below, we can determine the name with the most years in which at least one person has taken that name by counting the number of times each name appears in the <code>"Name"</code> column of <code>babynames</code>. Note that the return value is also a <code>Series</code>.</p>
-<div id="972d5c55" class="cell" data-execution_count="31">
+<div id="b351501e" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb41"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].value_counts().head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="31">
 <pre><code>Name
@@ -1763,7 +1763,7 @@ <h3 data-number="3.3.5" class="anchored" data-anchor-id="value_counts"><span cla
 <section id="unique" class="level3" data-number="3.3.6">
 <h3 data-number="3.3.6" class="anchored" data-anchor-id="unique"><span class="header-section-number">3.3.6</span> <code>.unique()</code></h3>
 <p>If we have a <code>Series</code> with many repeated values, then <code>.unique()</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.unique.html">(documentation)</a> can be used to identify only the <em>unique</em> values. Here we return an array of all the names in <code>babynames</code>.</p>
-<div id="65b99204" class="cell" data-execution_count="32">
+<div id="506b136c" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb43"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb43-1"><a href="#cb43-1" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].unique()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
 <pre><code>array(['Mary', 'Helen', 'Dorothy', ..., 'Zae', 'Zai', 'Zayvier'],
@@ -1774,7 +1774,7 @@ <h3 data-number="3.3.6" class="anchored" data-anchor-id="unique"><span class="he
 <section id="sort_values" class="level3" data-number="3.3.7">
 <h3 data-number="3.3.7" class="anchored" data-anchor-id="sort_values"><span class="header-section-number">3.3.7</span> <code>.sort_values()</code></h3>
 <p>Ordering a <code>DataFrame</code> can be useful for isolating extreme values. For example, the first 5 entries of a row sorted in descending order (that is, from highest to lowest) are the largest 5 values. <code>.sort_values</code> <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.sort_values.html">(documentation)</a> allows us to order a <code>DataFrame</code> or <code>Series</code> by a specified column. We can choose to either receive the rows in <code>ascending</code> order (default) or <code>descending</code> order.</p>
-<div id="57ca4e54" class="cell" data-execution_count="33">
+<div id="10df33c1" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb45"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sort the "Count" column from highest to lowest</span></span>
 <span id="cb45-2"><a href="#cb45-2" aria-hidden="true" tabindex="-1"></a>babynames.sort_values(by<span class="op">=</span><span class="st">"Count"</span>, ascending<span class="op">=</span><span class="va">False</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="33">
@@ -1840,7 +1840,7 @@ <h3 data-number="3.3.7" class="anchored" data-anchor-id="sort_values"><span clas
 </div>
 </div>
 <p>Unlike when calling <code>.value_counts()</code> on a <code>DataFrame</code>, we do not need to explicitly specify the column used for sorting when calling <code>.value_counts()</code> on a <code>Series</code>. We can still specify the ordering paradigm – that is, whether values are sorted in ascending or descending order.</p>
-<div id="ecd9d3c4" class="cell" data-execution_count="34">
+<div id="c72ce3d7" class="cell" data-execution_count="34">
 <div class="sourceCode cell-code" id="cb46"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb46-1"><a href="#cb46-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sort the "Name" Series alphabetically</span></span>
 <span id="cb46-2"><a href="#cb46-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"Name"</span>].sort_values(ascending<span class="op">=</span><span class="va">True</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="34">
diff --git a/docs/pandas_3/pandas_3.html b/docs/pandas_3/pandas_3.html
index f41c6ffd..9975c181 100644
--- a/docs/pandas_3/pandas_3.html
+++ b/docs/pandas_3/pandas_3.html
@@ -347,7 +347,7 @@ <h2 id="toc-title">Pandas III</h2>
 <h2 data-number="4.1" class="anchored" data-anchor-id="custom-sorts"><span class="header-section-number">4.1</span> Custom Sorts</h2>
 <p>First, let’s finish our discussion about sorting. Let’s try to solve a sorting problem using different approaches. Assume we want to find the longest baby names and sort our data accordingly.</p>
 <p>We’ll start by loading the <code>babynames</code> dataset. Note that this dataset is filtered to only contain data from California.</p>
-<div id="317618a0" class="cell" data-execution_count="1">
+<div id="4f4c04d7" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This code pulls census data and loads it into a DataFrame</span></span>
@@ -478,7 +478,7 @@ <h2 data-number="4.1" class="anchored" data-anchor-id="custom-sorts"><span class
 <section id="approach-1-create-a-temporary-column" class="level3" data-number="4.1.1">
 <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-temporary-column"><span class="header-section-number">4.1.1</span> Approach 1: Create a Temporary Column</h3>
 <p>One method to do this is to first start by creating a column that contains the lengths of the names.</p>
-<div id="3ea148ab" class="cell" data-execution_count="2">
+<div id="f3e08aa0" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a Series of the length of each name</span></span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a>babyname_lengths <span class="op">=</span> babynames[<span class="st">"Name"</span>].<span class="bu">str</span>.<span class="bu">len</span>()</span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -554,7 +554,7 @@ <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-tem
 </div>
 </div>
 <p>We can then sort the <code>DataFrame</code> by that column using <code>.sort_values()</code>:</p>
-<div id="bb42abfe" class="cell" data-execution_count="3">
+<div id="c30658d8" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Sort by the temporary column</span></span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.sort_values(by<span class="op">=</span><span class="st">"name_lengths"</span>, ascending<span class="op">=</span><span class="va">False</span>)</span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -627,7 +627,7 @@ <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-tem
 </div>
 </div>
 <p>Finally, we can drop the <code>name_length</code> column from <code>babynames</code> to prevent our table from getting cluttered.</p>
-<div id="e6c0b64a" class="cell" data-execution_count="4">
+<div id="7b91e5d1" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Drop the 'name_length' column</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.drop(<span class="st">"name_lengths"</span>, axis<span class="op">=</span><span class="st">'columns'</span>)</span>
 <span id="cb4-3"><a href="#cb4-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -697,7 +697,7 @@ <h3 data-number="4.1.1" class="anchored" data-anchor-id="approach-1-create-a-tem
 <section id="approach-2-sorting-using-the-key-argument" class="level3" data-number="4.1.2">
 <h3 data-number="4.1.2" class="anchored" data-anchor-id="approach-2-sorting-using-the-key-argument"><span class="header-section-number">4.1.2</span> Approach 2: Sorting using the <code>key</code> Argument</h3>
 <p>Another way to approach this is to use the <code>key</code> argument of <code>.sort_values()</code>. Here we can specify that we want to sort <code>"Name"</code> values by their length.</p>
-<div id="7af53d85" class="cell" data-execution_count="5">
+<div id="b5e04cde" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>babynames.sort_values(<span class="st">"Name"</span>, key<span class="op">=</span><span class="kw">lambda</span> x: x.<span class="bu">str</span>.<span class="bu">len</span>(), ascending<span class="op">=</span><span class="va">False</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
 <div>
@@ -765,7 +765,7 @@ <h3 data-number="4.1.2" class="anchored" data-anchor-id="approach-2-sorting-usin
 <section id="approach-3-sorting-using-the-map-function" class="level3" data-number="4.1.3">
 <h3 data-number="4.1.3" class="anchored" data-anchor-id="approach-3-sorting-using-the-map-function"><span class="header-section-number">4.1.3</span> Approach 3: Sorting using the <code>map</code> Function</h3>
 <p>We can also use the <code>map</code> function on a <code>Series</code> to solve this. Say we want to sort the <code>babynames</code> table by the number of <code>"dr"</code>’s and <code>"ea"</code>’s in each <code>"Name"</code>. We’ll define the function <code>dr_ea_count</code> to help us out.</p>
-<div id="36976371" class="cell" data-execution_count="6">
+<div id="b0c2fa0c" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="co"># First, define a function to count the number of times "dr" or "ea" appear in each name</span></span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> dr_ea_count(string):</span>
 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> string.count(<span class="st">'dr'</span>) <span class="op">+</span> string.count(<span class="st">'ea'</span>)</span>
@@ -845,7 +845,7 @@ <h3 data-number="4.1.3" class="anchored" data-anchor-id="approach-3-sorting-usin
 </div>
 </div>
 <p>We can drop the <code>dr_ea_count</code> once we’re done using it to maintain a neat table.</p>
-<div id="70623e76" class="cell" data-execution_count="7">
+<div id="f376c2cb" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Drop the `dr_ea_count` column</span></span>
 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>babynames <span class="op">=</span> babynames.drop(<span class="st">"dr_ea_count"</span>, axis <span class="op">=</span> <span class="st">'columns'</span>)</span>
 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a>babynames.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -917,10 +917,10 @@ <h3 data-number="4.1.3" class="anchored" data-anchor-id="approach-3-sorting-usin
 <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.groupby"><span class="header-section-number">4.2</span> Aggregating Data with <code>.groupby</code></h2>
 <p>Up until this point, we have been working with individual rows of <code>DataFrame</code>s. As data scientists, we often wish to investigate trends across a larger <em>subset</em> of our data. For example, we may want to compute some summary statistic (the mean, median, sum, etc.) for a group of rows in our <code>DataFrame</code>. To do this, we’ll use <code>pandas</code> <code>GroupBy</code> objects. Our goal is to group together rows that fall under the same category and perform an operation that aggregates across all rows in the category.</p>
 <p>Let’s say we wanted to aggregate all rows in <code>babynames</code> for a given year.</p>
-<div id="f12d1391" class="cell" data-execution_count="8">
+<div id="1a1425e0" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Year"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="8">
-<pre><code>&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x1199d4650&gt;</code></pre>
+<pre><code>&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x10d5f1a90&gt;</code></pre>
 </div>
 </div>
 <p>What does this strange output mean? Calling <code>.groupby</code> <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.groupby.html">(documentation)</a> has generated a <code>GroupBy</code> object. You can imagine this as a set of “mini” sub-<code>DataFrame</code>s, where each subframe contains all of the rows from <code>babynames</code> that correspond to a particular year.</p>
@@ -930,7 +930,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </center>
 <p>We can’t work with a <code>GroupBy</code> object directly – that is why you saw that strange output earlier rather than a standard view of a <code>DataFrame</code>. To actually manipulate values within these “mini” <code>DataFrame</code>s, we’ll need to call an <em>aggregation method</em>. This is a method that tells <code>pandas</code> how to aggregate the values within the <code>GroupBy</code> object. Once the aggregation is applied, <code>pandas</code> will return a normal (now grouped) <code>DataFrame</code>.</p>
 <p>The first aggregation method we’ll consider is <code>.agg</code>. The <code>.agg</code> method takes in a function as its argument; this function is then applied to each column of a “mini” grouped DataFrame. We end up with a new <code>DataFrame</code> with one aggregated row per subframe. Let’s see this in action by finding the <code>sum</code> of all counts for each year in <code>babynames</code> – this is equivalent to finding the number of babies born in each year.</p>
-<div id="86fb91df" class="cell" data-execution_count="9">
+<div id="297726ce" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>babynames[[<span class="st">"Year"</span>, <span class="st">"Count"</span>]].groupby(<span class="st">"Year"</span>).agg(<span class="st">"sum"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
 <div>
@@ -983,7 +983,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </div>
 <p>Calling <code>.agg</code> has condensed each subframe back into a single row. This gives us our final output: a <code>DataFrame</code> that is now indexed by <code>"Year"</code>, with a single row for each unique year in the original <code>babynames</code> DataFrame.</p>
 <p>There are many different aggregation functions we can use, all of which are useful in different applications.</p>
-<div id="24893fc3" class="cell" data-execution_count="10">
+<div id="dc0ad0f9" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>babynames[[<span class="st">"Year"</span>, <span class="st">"Count"</span>]].groupby(<span class="st">"Year"</span>).agg(<span class="st">"min"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="10">
 <div>
@@ -1027,7 +1027,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </div>
 </div>
 </div>
-<div id="9dca622f" class="cell" data-execution_count="11">
+<div id="403256d8" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>babynames[[<span class="st">"Year"</span>, <span class="st">"Count"</span>]].groupby(<span class="st">"Year"</span>).agg(<span class="st">"max"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="11">
 <div>
@@ -1071,7 +1071,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 </div>
 </div>
 </div>
-<div id="30152fdd" class="cell" data-execution_count="12">
+<div id="1ebd53c3" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Same result, but now we explicitly tell pandas to only consider the "Count" column when summing</span></span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Year"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"sum"</span>).head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
@@ -1125,7 +1125,7 @@ <h2 data-number="4.2" class="anchored" data-anchor-id="aggregating-data-with-.gr
 <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions"><span class="header-section-number">4.2.1</span> Aggregation Functions</h3>
 <p>Because of this fairly broad requirement, <code>pandas</code> offers many ways of computing an aggregation.</p>
 <p><strong>In-built</strong> Python operations – such as <code>sum</code>, <code>max</code>, and <code>min</code> – are automatically recognized by <code>pandas</code>.</p>
-<div id="521fb733" class="cell" data-execution_count="13">
+<div id="7df8f928" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># What is the minimum count for each name in any year?</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"min"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
@@ -1170,7 +1170,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 </div>
 </div>
 </div>
-<div id="6fbd781a" class="cell" data-execution_count="14">
+<div id="f11b3a5e" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="co"># What is the largest single-year count of each name?</span></span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"max"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="14">
@@ -1216,7 +1216,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 </div>
 </div>
 <p>As mentioned previously, functions from the <code>NumPy</code> library, such as <code>np.mean</code>, <code>np.max</code>, <code>np.min</code>, and <code>np.sum</code>, are also fair game in <code>pandas</code>.</p>
-<div id="299141f3" class="cell" data-execution_count="15">
+<div id="afe48207" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># What is the average count for each name across all years?</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"mean"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="15">
@@ -1272,7 +1272,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 </ul>
 <p>The latter two entries in this list – <code>"first"</code> and <code>"last"</code> – are unique to <code>pandas</code>. They return the first or last entry in a subframe column. Why might this be useful? Consider a case where <em>multiple</em> columns in a group share identical information. To represent this information in the grouped output, we can simply grab the first or last entry, which we know will be identical to all other entries.</p>
 <p>Let’s illustrate this with an example. Say we add a new column to <code>babynames</code> that contains the first letter of each name.</p>
-<div id="932d5374" class="cell" data-execution_count="16">
+<div id="ae4311d0" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Imagine we had an additional column, "First Letter". We'll explain this code next week</span></span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>babynames[<span class="st">"First Letter"</span>] <span class="op">=</span> babynames[<span class="st">"Name"</span>].<span class="bu">str</span>[<span class="dv">0</span>]</span>
 <span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1337,7 +1337,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 <figcaption>Aggregating using “first”</figcaption>
 </figure>
 </div>
-<div id="56732a43" class="cell" data-execution_count="17">
+<div id="cd7e5bab" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>babynames_new.groupby(<span class="st">"Name"</span>).agg({<span class="st">"First Letter"</span>:<span class="st">"first"</span>, <span class="st">"Year"</span>:<span class="st">"max"</span>}).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="17">
 <div>
@@ -1392,7 +1392,7 @@ <h3 data-number="4.2.1" class="anchored" data-anchor-id="aggregation-functions">
 <section id="plotting-birth-counts" class="level3" data-number="4.2.2">
 <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts"><span class="header-section-number">4.2.2</span> Plotting Birth Counts</h3>
 <p>Let’s use <code>.agg</code> to find the total number of babies born in each year. Recall that using <code>.agg</code> with <code>.groupby()</code> follows the format: <code>df.groupby(column_name).agg(aggregation_function)</code>. The line of code below gives us the total number of babies born in each year.</p>
-<div id="13b66ff0" class="cell" data-execution_count="18">
+<div id="7a9ba06d" class="cell" data-execution_count="18">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Year"</span>)[[<span class="st">"Count"</span>]].agg(<span class="bu">sum</span>).head(<span class="dv">5</span>)</span>
@@ -1402,7 +1402,7 @@ <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts">
 <span id="cb19-5"><a href="#cb19-5" aria-hidden="true" tabindex="-1"></a><span class="co"># babynames.groupby("Year").sum(numeric_only=True)</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98187/390646742.py:1: FutureWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32655/390646742.py:1: FutureWarning:
 
 The provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 </code></pre>
@@ -1452,7 +1452,7 @@ <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts">
 <p>Here’s an illustration of the process:</p>
 <p><img src="images/aggregation.png" alt="aggregation" width="600"></p>
 <p>Plotting the <code>Dataframe</code> we obtain tells an interesting story.</p>
-<div id="253b947b" class="cell" data-execution_count="19">
+<div id="eccb7ba4" class="cell" data-execution_count="19">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.express <span class="im">as</span> px</span>
@@ -1460,9 +1460,9 @@ <h3 data-number="4.2.2" class="anchored" data-anchor-id="plotting-birth-counts">
 <span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>px.line(puzzle2, y <span class="op">=</span> <span class="st">"Count"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="30faa2fa-c376-4aee-8dc9-1a9df629a9a4" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("30faa2fa-c376-4aee-8dc9-1a9df629a9a4")) {                    Plotly.newPlot(                        "30faa2fa-c376-4aee-8dc9-1a9df629a9a4",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[9163,9983,17946,22094,26926,35835,37501,39916,44692,45119,54142,58983,61004,67917,74451,73493,72910,74201,74264,72108,75294,71467,69522,66895,69789,71603,74932,83738,91626,93461,102627,114296,142033,159813,164349,171764,204945,232313,229033,233625,235582,250468,271681,287484,297099,304567,324186,340083,337562,345901,358544,363926,360475,361897,355386,336567,319421,318819,321040,333671,342411,310020,287239,275036,286947,290518,302547,315011,322241,343070,365973,382156,390581,394608,404961,425583,435964,453824,480602,512615,552647,549317,541054,524983,509302,494635,483288,468412,464300,460844,471649,466934,467742,477651,480892,484503,494971,497627,483360,460305,444619,437818,439402,431945,440683,431317,427015,411058,395436,386996,362882,362582,360023],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="6e31c358-dcab-4f58-957f-eee659838f19" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("6e31c358-dcab-4f58-957f-eee659838f19")) {                    Plotly.newPlot(                        "6e31c358-dcab-4f58-957f-eee659838f19",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[9163,9983,17946,22094,26926,35835,37501,39916,44692,45119,54142,58983,61004,67917,74451,73493,72910,74201,74264,72108,75294,71467,69522,66895,69789,71603,74932,83738,91626,93461,102627,114296,142033,159813,164349,171764,204945,232313,229033,233625,235582,250468,271681,287484,297099,304567,324186,340083,337562,345901,358544,363926,360475,361897,355386,336567,319421,318819,321040,333671,342411,310020,287239,275036,286947,290518,302547,315011,322241,343070,365973,382156,390581,394608,404961,425583,435964,453824,480602,512615,552647,549317,541054,524983,509302,494635,483288,468412,464300,460844,471649,466934,467742,477651,480892,484503,494971,497627,483360,460305,444619,437818,439402,431945,440683,431317,427015,411058,395436,386996,362882,362582,360023],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('30faa2fa-c376-4aee-8dc9-1a9df629a9a4');
+var gd = document.getElementById('6e31c358-dcab-4f58-957f-eee659838f19');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1506,7 +1506,7 @@ <h3 data-number="4.2.4" class="anchored" data-anchor-id="revisiting-the-.agg-fun
 <pre><code>babynames.groupby("Year").mean().head()</code></pre>
 <p>We can now put this all into practice. Say we want to find the baby name with sex “F” that has fallen in popularity the most in California. To calculate this, we can first create a metric: “Ratio to Peak” (RTP). The RTP is the ratio of babies born with a given name in 2022 to the <em>maximum</em> number of babies born with the name in <em>any</em> year.</p>
 <p>Let’s start with calculating this for one baby, “Jennifer”.</p>
-<div id="63b4ac3a" class="cell" data-execution_count="20">
+<div id="dd546eb7" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># We filter by babies with sex "F" and sort by "Year"</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>f_babynames <span class="op">=</span> babynames[babynames[<span class="st">"Sex"</span>] <span class="op">==</span> <span class="st">"F"</span>]</span>
 <span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a>f_babynames <span class="op">=</span> f_babynames.sort_values([<span class="st">"Year"</span>])</span>
@@ -1525,7 +1525,7 @@ <h3 data-number="4.2.4" class="anchored" data-anchor-id="revisiting-the-.agg-fun
 </div>
 </div>
 <p>By creating a function to calculate RTP and applying it to our <code>DataFrame</code> by using <code>.groupby()</code>, we can easily compute the RTP for all names at once!</p>
-<div id="ea1aeab5" class="cell" data-execution_count="21">
+<div id="dd0e8b3f" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> ratio_to_peak(series):</span>
 <span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> series.iloc[<span class="op">-</span><span class="dv">1</span>] <span class="op">/</span> <span class="bu">max</span>(series)</span>
 <span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1593,7 +1593,7 @@ <h3 data-number="4.2.5" class="anchored" data-anchor-id="nuisance-columns"><span
 <section id="renaming-columns-after-grouping" class="level3" data-number="4.2.6">
 <h3 data-number="4.2.6" class="anchored" data-anchor-id="renaming-columns-after-grouping"><span class="header-section-number">4.2.6</span> Renaming Columns After Grouping</h3>
 <p>By default, <code>.groupby</code> will not rename any aggregated columns. As we can see in the table above, the aggregated column is still named <code>Count</code> even though it now represents the RTP. For better readability, we can rename <code>Count</code> to <code>Count RTP</code></p>
-<div id="768d24dd" class="cell" data-execution_count="22">
+<div id="f85879d5" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a>rtp_table <span class="op">=</span> rtp_table.rename(columns <span class="op">=</span> {<span class="st">"Count"</span>: <span class="st">"Count RTP"</span>})</span>
 <span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a>rtp_table</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="22">
@@ -1680,7 +1680,7 @@ <h3 data-number="4.2.6" class="anchored" data-anchor-id="renaming-columns-after-
 <section id="some-data-science-payoff" class="level3" data-number="4.2.7">
 <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payoff"><span class="header-section-number">4.2.7</span> Some Data Science Payoff</h3>
 <p>By sorting <code>rtp_table</code>, we can see the names whose popularity has decreased the most.</p>
-<div id="c2148f3c" class="cell" data-execution_count="23">
+<div id="466a698f" class="cell" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>rtp_table <span class="op">=</span> rtp_table.rename(columns <span class="op">=</span> {<span class="st">"Count"</span>: <span class="st">"Count RTP"</span>})</span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>rtp_table.sort_values(<span class="st">"Count RTP"</span>).head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="23">
@@ -1733,16 +1733,16 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </div>
 </div>
 <p>To visualize the above <code>DataFrame</code>, let’s look at the line plot below:</p>
-<div id="882e5b1b" class="cell" data-execution_count="24">
+<div id="30fdd460" class="cell" data-execution_count="24">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> plotly.express <span class="im">as</span> px</span>
 <span id="cb28-2"><a href="#cb28-2" aria-hidden="true" tabindex="-1"></a>px.line(f_babynames[f_babynames[<span class="st">"Name"</span>] <span class="op">==</span> <span class="st">"Debra"</span>], x <span class="op">=</span> <span class="st">"Year"</span>, y <span class="op">=</span> <span class="st">"Count"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 <div class="cell-output cell-output-display">
-<div>                            <div id="6347735e-5a3e-40a3-a712-1c548e959c09" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("6347735e-5a3e-40a3-a712-1c548e959c09")) {                    Plotly.newPlot(                        "6347735e-5a3e-40a3-a712-1c548e959c09",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="c52c817f-22f1-4929-bbe8-5c071babce53" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("c52c817f-22f1-4929-bbe8-5c071babce53")) {                    Plotly.newPlot(                        "c52c817f-22f1-4929-bbe8-5c071babce53",                        [{"hovertemplate":"Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"","orientation":"v","showlegend":false,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('6347735e-5a3e-40a3-a712-1c548e959c09');
+var gd = document.getElementById('c52c817f-22f1-4929-bbe8-5c071babce53');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1768,7 +1768,7 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </div>
 </div>
 <p>We can get the list of the top 10 names and then plot popularity with the following code:</p>
-<div id="86bb92ee" class="cell" data-execution_count="25">
+<div id="e0b5abe8" class="cell" data-execution_count="25">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>top10 <span class="op">=</span> rtp_table.sort_values(<span class="st">"Count RTP"</span>).head(<span class="dv">10</span>).index</span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a>px.line(</span>
 <span id="cb29-3"><a href="#cb29-3" aria-hidden="true" tabindex="-1"></a>    f_babynames[f_babynames[<span class="st">"Name"</span>].isin(top10)], </span>
@@ -1783,9 +1783,9 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </code></pre>
 </div>
 <div class="cell-output cell-output-display">
-<div>                            <div id="eff562dd-9e47-48cf-a697-65f945898edd" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("eff562dd-9e47-48cf-a697-65f945898edd")) {                    Plotly.newPlot(                        "eff562dd-9e47-48cf-a697-65f945898edd",                        [{"hovertemplate":"Name=Carol<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Carol","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Carol","orientation":"v","showlegend":true,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[8,13,17,16,26,38,59,47,55,48,64,67,74,94,138,153,151,148,193,279,270,297,367,453,559,669,873,1015,1050,1109,1079,1339,1672,1937,2089,2138,2152,2201,1954,1779,1737,1734,1727,1597,1684,1651,1704,1703,1545,1480,1359,1283,1191,993,1034,815,622,577,543,468,366,267,223,187,173,146,145,145,121,132,123,128,106,114,111,101,120,107,108,134,150,136,129,89,92,75,87,64,61,46,64,33,43,47,52,76,62,38,44,26,17,47,31,36,24,13,25,18,29,20,17,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Susan<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Susan","line":{"color":"#EF553B","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Susan","orientation":"v","showlegend":true,"x":[1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,8,8,10,16,17,15,20,22,21,19,15,22,26,32,29,43,25,37,63,47,63,74,101,118,138,183,271,433,630,795,1058,1380,1596,1991,2689,2831,3338,3180,3260,3346,3424,3753,3934,3900,3771,3631,3504,3123,3145,3135,2952,2839,2535,2008,1825,1644,1367,1232,1070,861,651,530,552,496,456,437,424,409,420,361,391,352,338,273,280,272,286,267,272,260,196,202,172,152,152,114,116,103,100,104,85,76,70,71,74,53,56,41,39,43,28,44,26,45,22,26,22,19,17,8,13],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tina<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tina","line":{"color":"#00cc96","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tina","orientation":"v","showlegend":true,"x":[1915,1916,1917,1918,1920,1921,1922,1924,1925,1927,1928,1929,1930,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[5,6,5,5,5,7,5,9,5,8,8,5,10,10,7,8,12,9,28,45,43,53,64,80,80,88,92,128,168,163,177,366,569,569,700,753,889,1045,1228,1212,1129,1202,1282,1342,1402,1302,1248,1091,941,634,642,546,450,370,414,363,335,371,310,268,271,310,238,252,252,208,180,196,163,171,147,121,111,91,80,83,90,80,67,64,63,69,36,37,47,39,39,27,39,28,46,38,33,36,26,21,15,13,6],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Cheryl<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Cheryl","line":{"color":"#ab63fa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Cheryl","orientation":"v","showlegend":true,"x":[1930,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2011,2012,2013,2014,2015,2016,2017,2018,2019,2021,2022],"xaxis":"x","y":[6,8,12,10,16,76,49,42,48,87,377,759,801,1063,1093,1021,916,903,993,955,1058,1465,1639,1715,1833,1832,1639,1624,1565,1420,1295,1207,1051,950,899,751,635,550,428,371,293,271,236,199,178,303,299,272,204,229,164,135,129,130,98,106,88,90,65,55,39,47,38,30,30,19,22,24,14,11,16,17,16,13,21,14,11,15,12,10,12,15,8,10,9,6,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Michele<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Michele","line":{"color":"#FFA15A","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Michele","orientation":"v","showlegend":true,"x":[1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2022],"xaxis":"x","y":[7,8,5,8,18,34,113,132,166,171,172,253,213,335,295,306,401,421,500,498,464,454,470,506,576,763,766,775,768,796,1037,1033,1111,1016,973,700,702,571,494,484,437,390,381,305,281,223,230,227,200,162,206,146,143,164,137,142,125,104,82,65,52,47,45,38,28,37,27,22,28,16,21,15,15,11,14,7,5,10,6,11,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debbie<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debbie","line":{"color":"#19d3f3","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debbie","orientation":"v","showlegend":true,"x":[1936,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016,2017,2021],"xaxis":"x","y":[5,9,9,10,16,11,32,74,91,115,120,191,233,300,427,697,902,1313,1656,1776,1675,1547,1458,1215,1004,648,504,415,338,279,243,192,145,108,108,92,72,64,87,91,81,65,79,67,74,64,56,71,78,93,85,78,50,61,70,53,46,39,22,28,19,11,16,14,13,8,21,10,11,10,12,8,9,6,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Terri<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Terri","line":{"color":"#FF6692","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Terri","orientation":"v","showlegend":true,"x":[1938,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2004,2005,2006,2016,2021,2022],"xaxis":"x","y":[6,8,12,26,32,38,65,99,130,132,168,154,236,306,379,542,604,685,839,875,1052,964,937,902,826,737,486,448,398,323,312,263,191,153,120,106,81,59,84,57,44,49,47,53,44,36,37,35,32,34,20,26,29,15,19,22,11,15,12,13,11,14,9,7,6,7,5,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Shannon<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Shannon","line":{"color":"#B6E880","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Shannon","orientation":"v","showlegend":true,"x":[1938,1939,1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,9,6,10,14,19,25,16,34,23,34,43,51,59,73,83,111,106,126,129,161,145,206,216,305,409,441,516,587,932,1419,1650,1436,1198,1090,1127,982,1218,1136,1052,991,923,968,969,971,945,872,803,699,642,597,527,493,594,615,531,438,428,366,303,217,199,200,165,133,133,110,90,88,63,42,43,37,41,32,19,31,22,17,14,21,8,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debra<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debra","line":{"color":"#FF97FF","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debra","orientation":"v","showlegend":true,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tammy<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tammy","line":{"color":"#FECB52","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tammy","orientation":"v","showlegend":true,"x":[1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019,2022],"xaxis":"x","y":[7,5,10,9,12,13,10,9,9,13,28,14,26,37,368,746,990,1038,1136,1223,1539,1273,1219,1168,1143,1099,977,1013,859,704,544,421,392,328,275,229,227,181,168,157,96,120,102,85,120,88,85,94,77,82,74,61,49,45,45,54,50,47,49,45,44,36,30,24,29,14,16,12,11,9,5,13,9,15,11,7,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"title":{"text":"Name"},"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
+<div>                            <div id="962984f2-0497-4d69-ac70-05ee455c755e" class="plotly-graph-div" style="height:525px; width:100%;"></div>            <script type="text/javascript">                require(["plotly"], function(Plotly) {                    window.PLOTLYENV=window.PLOTLYENV || {};                                    if (document.getElementById("962984f2-0497-4d69-ac70-05ee455c755e")) {                    Plotly.newPlot(                        "962984f2-0497-4d69-ac70-05ee455c755e",                        [{"hovertemplate":"Name=Carol<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Carol","line":{"color":"#636efa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Carol","orientation":"v","showlegend":true,"x":[1910,1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[8,13,17,16,26,38,59,47,55,48,64,67,74,94,138,153,151,148,193,279,270,297,367,453,559,669,873,1015,1050,1109,1079,1339,1672,1937,2089,2138,2152,2201,1954,1779,1737,1734,1727,1597,1684,1651,1704,1703,1545,1480,1359,1283,1191,993,1034,815,622,577,543,468,366,267,223,187,173,146,145,145,121,132,123,128,106,114,111,101,120,107,108,134,150,136,129,89,92,75,87,64,61,46,64,33,43,47,52,76,62,38,44,26,17,47,31,36,24,13,25,18,29,20,17,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Susan<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Susan","line":{"color":"#EF553B","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Susan","orientation":"v","showlegend":true,"x":[1911,1912,1913,1914,1915,1916,1917,1918,1919,1920,1921,1922,1923,1924,1925,1926,1927,1928,1929,1930,1931,1932,1933,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,8,8,10,16,17,15,20,22,21,19,15,22,26,32,29,43,25,37,63,47,63,74,101,118,138,183,271,433,630,795,1058,1380,1596,1991,2689,2831,3338,3180,3260,3346,3424,3753,3934,3900,3771,3631,3504,3123,3145,3135,2952,2839,2535,2008,1825,1644,1367,1232,1070,861,651,530,552,496,456,437,424,409,420,361,391,352,338,273,280,272,286,267,272,260,196,202,172,152,152,114,116,103,100,104,85,76,70,71,74,53,56,41,39,43,28,44,26,45,22,26,22,19,17,8,13],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tina<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tina","line":{"color":"#00cc96","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tina","orientation":"v","showlegend":true,"x":[1915,1916,1917,1918,1920,1921,1922,1924,1925,1927,1928,1929,1930,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[5,6,5,5,5,7,5,9,5,8,8,5,10,10,7,8,12,9,28,45,43,53,64,80,80,88,92,128,168,163,177,366,569,569,700,753,889,1045,1228,1212,1129,1202,1282,1342,1402,1302,1248,1091,941,634,642,546,450,370,414,363,335,371,310,268,271,310,238,252,252,208,180,196,163,171,147,121,111,91,80,83,90,80,67,64,63,69,36,37,47,39,39,27,39,28,46,38,33,36,26,21,15,13,6],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Cheryl<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Cheryl","line":{"color":"#ab63fa","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Cheryl","orientation":"v","showlegend":true,"x":[1930,1934,1935,1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2011,2012,2013,2014,2015,2016,2017,2018,2019,2021,2022],"xaxis":"x","y":[6,8,12,10,16,76,49,42,48,87,377,759,801,1063,1093,1021,916,903,993,955,1058,1465,1639,1715,1833,1832,1639,1624,1565,1420,1295,1207,1051,950,899,751,635,550,428,371,293,271,236,199,178,303,299,272,204,229,164,135,129,130,98,106,88,90,65,55,39,47,38,30,30,19,22,24,14,11,16,17,16,13,21,14,11,15,12,10,12,15,8,10,9,6,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Michele<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Michele","line":{"color":"#FFA15A","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Michele","orientation":"v","showlegend":true,"x":[1936,1937,1938,1939,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2022],"xaxis":"x","y":[7,8,5,8,18,34,113,132,166,171,172,253,213,335,295,306,401,421,500,498,464,454,470,506,576,763,766,775,768,796,1037,1033,1111,1016,973,700,702,571,494,484,437,390,381,305,281,223,230,227,200,162,206,146,143,164,137,142,125,104,82,65,52,47,45,38,28,37,27,22,28,16,21,15,15,11,14,7,5,10,6,11,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debbie<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debbie","line":{"color":"#19d3f3","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debbie","orientation":"v","showlegend":true,"x":[1936,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2014,2015,2016,2017,2021],"xaxis":"x","y":[5,9,9,10,16,11,32,74,91,115,120,191,233,300,427,697,902,1313,1656,1776,1675,1547,1458,1215,1004,648,504,415,338,279,243,192,145,108,108,92,72,64,87,91,81,65,79,67,74,64,56,71,78,93,85,78,50,61,70,53,46,39,22,28,19,11,16,14,13,8,21,10,11,10,12,8,9,6,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Terri<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Terri","line":{"color":"#FF6692","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Terri","orientation":"v","showlegend":true,"x":[1938,1940,1941,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2004,2005,2006,2016,2021,2022],"xaxis":"x","y":[6,8,12,26,32,38,65,99,130,132,168,154,236,306,379,542,604,685,839,875,1052,964,937,902,826,737,486,448,398,323,312,263,191,153,120,106,81,59,84,57,44,49,47,53,44,36,37,35,32,34,20,26,29,15,19,22,11,15,12,13,11,14,9,7,6,7,5,5,5,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Shannon<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Shannon","line":{"color":"#B6E880","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Shannon","orientation":"v","showlegend":true,"x":[1938,1939,1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022],"xaxis":"x","y":[6,9,6,10,14,19,25,16,34,23,34,43,51,59,73,83,111,106,126,129,161,145,206,216,305,409,441,516,587,932,1419,1650,1436,1198,1090,1127,982,1218,1136,1052,991,923,968,969,971,945,872,803,699,642,597,527,493,594,615,531,438,428,366,303,217,199,200,165,133,133,110,90,88,63,42,43,37,41,32,19,31,22,17,14,21,8,8,7],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Debra<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Debra","line":{"color":"#FF97FF","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Debra","orientation":"v","showlegend":true,"x":[1940,1942,1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2012,2013,2016],"xaxis":"x","y":[7,7,8,15,19,20,56,92,199,601,1510,2351,3295,3784,3969,3755,3318,2660,2290,2014,1647,1592,1430,1287,1154,958,818,748,647,547,463,318,242,236,159,151,151,164,130,141,97,114,97,95,93,64,78,69,71,51,62,41,34,28,28,12,14,16,10,13,14,10,7,12,13,12,13,6,7,5,8,5],"yaxis":"y","type":"scatter"},{"hovertemplate":"Name=Tammy<br>Year=%{x}<br>Count=%{y}<extra></extra>","legendgroup":"Tammy","line":{"color":"#FECB52","dash":"solid"},"marker":{"symbol":"circle"},"mode":"lines","name":"Tammy","orientation":"v","showlegend":true,"x":[1943,1944,1945,1946,1947,1948,1949,1950,1951,1952,1953,1954,1955,1956,1957,1958,1959,1960,1961,1962,1963,1964,1965,1966,1967,1968,1969,1970,1971,1972,1973,1974,1975,1976,1977,1978,1979,1980,1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,2001,2002,2003,2004,2005,2006,2007,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2019,2022],"xaxis":"x","y":[7,5,10,9,12,13,10,9,9,13,28,14,26,37,368,746,990,1038,1136,1223,1539,1273,1219,1168,1143,1099,977,1013,859,704,544,421,392,328,275,229,227,181,168,157,96,120,102,85,120,88,85,94,77,82,74,61,49,45,45,54,50,47,49,45,44,36,30,24,29,14,16,12,11,9,5,13,9,15,11,7,5],"yaxis":"y","type":"scatter"}],                        {"template":{"data":{"histogram2dcontour":[{"type":"histogram2dcontour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"choropleth":[{"type":"choropleth","colorbar":{"outlinewidth":0,"ticks":""}}],"histogram2d":[{"type":"histogram2d","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmap":[{"type":"heatmap","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"heatmapgl":[{"type":"heatmapgl","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"contourcarpet":[{"type":"contourcarpet","colorbar":{"outlinewidth":0,"ticks":""}}],"contour":[{"type":"contour","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"surface":[{"type":"surface","colorbar":{"outlinewidth":0,"ticks":""},"colorscale":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]]}],"mesh3d":[{"type":"mesh3d","colorbar":{"outlinewidth":0,"ticks":""}}],"scatter":[{"fillpattern":{"fillmode":"overlay","size":10,"solidity":0.2},"type":"scatter"}],"parcoords":[{"type":"parcoords","line":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolargl":[{"type":"scatterpolargl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"bar":[{"error_x":{"color":"#2a3f5f"},"error_y":{"color":"#2a3f5f"},"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"bar"}],"scattergeo":[{"type":"scattergeo","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterpolar":[{"type":"scatterpolar","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"histogram":[{"marker":{"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"histogram"}],"scattergl":[{"type":"scattergl","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatter3d":[{"type":"scatter3d","line":{"colorbar":{"outlinewidth":0,"ticks":""}},"marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattermapbox":[{"type":"scattermapbox","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scatterternary":[{"type":"scatterternary","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"scattercarpet":[{"type":"scattercarpet","marker":{"colorbar":{"outlinewidth":0,"ticks":""}}}],"carpet":[{"aaxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"baxis":{"endlinecolor":"#2a3f5f","gridcolor":"white","linecolor":"white","minorgridcolor":"white","startlinecolor":"#2a3f5f"},"type":"carpet"}],"table":[{"cells":{"fill":{"color":"#EBF0F8"},"line":{"color":"white"}},"header":{"fill":{"color":"#C8D4E3"},"line":{"color":"white"}},"type":"table"}],"barpolar":[{"marker":{"line":{"color":"#E5ECF6","width":0.5},"pattern":{"fillmode":"overlay","size":10,"solidity":0.2}},"type":"barpolar"}],"pie":[{"automargin":true,"type":"pie"}]},"layout":{"autotypenumbers":"strict","colorway":["#636efa","#EF553B","#00cc96","#ab63fa","#FFA15A","#19d3f3","#FF6692","#B6E880","#FF97FF","#FECB52"],"font":{"color":"#2a3f5f"},"hovermode":"closest","hoverlabel":{"align":"left"},"paper_bgcolor":"white","plot_bgcolor":"#E5ECF6","polar":{"bgcolor":"#E5ECF6","angularaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"radialaxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"ternary":{"bgcolor":"#E5ECF6","aaxis":{"gridcolor":"white","linecolor":"white","ticks":""},"baxis":{"gridcolor":"white","linecolor":"white","ticks":""},"caxis":{"gridcolor":"white","linecolor":"white","ticks":""}},"coloraxis":{"colorbar":{"outlinewidth":0,"ticks":""}},"colorscale":{"sequential":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"sequentialminus":[[0.0,"#0d0887"],[0.1111111111111111,"#46039f"],[0.2222222222222222,"#7201a8"],[0.3333333333333333,"#9c179e"],[0.4444444444444444,"#bd3786"],[0.5555555555555556,"#d8576b"],[0.6666666666666666,"#ed7953"],[0.7777777777777778,"#fb9f3a"],[0.8888888888888888,"#fdca26"],[1.0,"#f0f921"]],"diverging":[[0,"#8e0152"],[0.1,"#c51b7d"],[0.2,"#de77ae"],[0.3,"#f1b6da"],[0.4,"#fde0ef"],[0.5,"#f7f7f7"],[0.6,"#e6f5d0"],[0.7,"#b8e186"],[0.8,"#7fbc41"],[0.9,"#4d9221"],[1,"#276419"]]},"xaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"yaxis":{"gridcolor":"white","linecolor":"white","ticks":"","title":{"standoff":15},"zerolinecolor":"white","automargin":true,"zerolinewidth":2},"scene":{"xaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"yaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2},"zaxis":{"backgroundcolor":"#E5ECF6","gridcolor":"white","linecolor":"white","showbackground":true,"ticks":"","zerolinecolor":"white","gridwidth":2}},"shapedefaults":{"line":{"color":"#2a3f5f"}},"annotationdefaults":{"arrowcolor":"#2a3f5f","arrowhead":0,"arrowwidth":1},"geo":{"bgcolor":"white","landcolor":"#E5ECF6","subunitcolor":"white","showland":true,"showlakes":true,"lakecolor":"white"},"title":{"x":0.05},"mapbox":{"style":"light"},"margin":{"b":0,"l":0,"r":0,"t":30}}},"xaxis":{"anchor":"y","domain":[0.0,1.0],"title":{"text":"Year"}},"yaxis":{"anchor":"x","domain":[0.0,1.0],"title":{"text":"Count"}},"legend":{"title":{"text":"Name"},"tracegroupgap":0}},                        {"responsive": true}                    ).then(function(){
                             
-var gd = document.getElementById('eff562dd-9e47-48cf-a697-65f945898edd');
+var gd = document.getElementById('962984f2-0497-4d69-ac70-05ee455c755e');
 var x = new MutationObserver(function (mutations, observer) {{
         var display = window.getComputedStyle(gd).display;
         if (!display || display === 'none') {{
@@ -1811,7 +1811,7 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 </div>
 </div>
 <p>As a quick exercise, consider what code would compute the total number of babies with each name.</p>
-<div id="86b093ee" class="cell" data-execution_count="26">
+<div id="d1898f44" class="cell" data-execution_count="26">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb31"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb31-1"><a href="#cb31-1" aria-hidden="true" tabindex="-1"></a>babynames.groupby(<span class="st">"Name"</span>)[[<span class="st">"Count"</span>]].agg(<span class="st">"sum"</span>).head()</span>
@@ -1865,7 +1865,7 @@ <h3 data-number="4.2.7" class="anchored" data-anchor-id="some-data-science-payof
 <section id="groupby-continued" class="level2" data-number="4.3">
 <h2 data-number="4.3" class="anchored" data-anchor-id="groupby-continued"><span class="header-section-number">4.3</span> <code>.groupby()</code>, Continued</h2>
 <p>We’ll work with the <code>elections</code> <code>DataFrame</code> again.</p>
-<div id="0eae3882" class="cell" data-execution_count="27">
+<div id="da2348c5" class="cell" data-execution_count="27">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -1945,7 +1945,7 @@ <h2 data-number="4.3" class="anchored" data-anchor-id="groupby-continued"><span
 <section id="raw-groupby-objects" class="level3" data-number="4.3.1">
 <h3 data-number="4.3.1" class="anchored" data-anchor-id="raw-groupby-objects"><span class="header-section-number">4.3.1</span> Raw <code>GroupBy</code> Objects</h3>
 <p>The result of <code>groupby</code> applied to a <code>DataFrame</code> is a <code>DataFrameGroupBy</code> object, <strong>not</strong> a <code>DataFrame</code>.</p>
-<div id="21c49dab" class="cell" data-execution_count="28">
+<div id="3419a8ae" class="cell" data-execution_count="28">
 <div class="sourceCode cell-code" id="cb33"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb33-1"><a href="#cb33-1" aria-hidden="true" tabindex="-1"></a>grouped_by_year <span class="op">=</span> elections.groupby(<span class="st">"Year"</span>)</span>
 <span id="cb33-2"><a href="#cb33-2" aria-hidden="true" tabindex="-1"></a><span class="bu">type</span>(grouped_by_year)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="28">
@@ -1953,14 +1953,14 @@ <h3 data-number="4.3.1" class="anchored" data-anchor-id="raw-groupby-objects"><s
 </div>
 </div>
 <p>There are several ways to look into <code>DataFrameGroupBy</code> objects:</p>
-<div id="9549d9cb" class="cell" data-execution_count="29">
+<div id="9e7f3d9a" class="cell" data-execution_count="29">
 <div class="sourceCode cell-code" id="cb35"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb35-1"><a href="#cb35-1" aria-hidden="true" tabindex="-1"></a>grouped_by_party <span class="op">=</span> elections.groupby(<span class="st">"Party"</span>)</span>
 <span id="cb35-2"><a href="#cb35-2" aria-hidden="true" tabindex="-1"></a>grouped_by_party.groups</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="29">
 <pre><code>{'American': [22, 126], 'American Independent': [115, 119, 124], 'Anti-Masonic': [6], 'Anti-Monopoly': [38], 'Citizens': [127], 'Communist': [89], 'Constitution': [160, 164, 172], 'Constitutional Union': [24], 'Democratic': [2, 4, 8, 10, 13, 14, 17, 20, 28, 29, 34, 37, 39, 45, 47, 52, 55, 57, 64, 70, 74, 77, 81, 83, 86, 91, 94, 97, 100, 105, 108, 111, 114, 116, 118, 123, 129, 134, 137, 140, 144, 151, 158, 162, 168, 176, 178], 'Democratic-Republican': [0, 1], 'Dixiecrat': [103], 'Farmer–Labor': [78], 'Free Soil': [15, 18], 'Green': [149, 155, 156, 165, 170, 177, 181], 'Greenback': [35], 'Independent': [121, 130, 143, 161, 167, 174], 'Liberal Republican': [31], 'Libertarian': [125, 128, 132, 138, 139, 146, 153, 159, 163, 169, 175, 180], 'National Democratic': [50], 'National Republican': [3, 5], 'National Union': [27], 'Natural Law': [148], 'New Alliance': [136], 'Northern Democratic': [26], 'Populist': [48, 61, 141], 'Progressive': [68, 82, 101, 107], 'Prohibition': [41, 44, 49, 51, 54, 59, 63, 67, 73, 75, 99], 'Reform': [150, 154], 'Republican': [21, 23, 30, 32, 33, 36, 40, 43, 46, 53, 56, 60, 65, 69, 72, 79, 80, 84, 87, 90, 96, 98, 104, 106, 109, 112, 113, 117, 120, 122, 131, 133, 135, 142, 145, 152, 157, 166, 171, 173, 179], 'Socialist': [58, 62, 66, 71, 76, 85, 88, 92, 95, 102], 'Southern Democratic': [25], 'States' Rights': [110], 'Taxpayers': [147], 'Union': [93], 'Union Labor': [42], 'Whig': [7, 9, 11, 12, 16, 19]}</code></pre>
 </div>
 </div>
-<div id="8fea5067" class="cell" data-execution_count="30">
+<div id="c4de670c" class="cell" data-execution_count="30">
 <div class="sourceCode cell-code" id="cb37"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb37-1"><a href="#cb37-1" aria-hidden="true" tabindex="-1"></a>grouped_by_party.get_group(<span class="st">"Socialist"</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="30">
 <div>
@@ -2088,7 +2088,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 <li><a href="https://pandas.pydata.org/docs/reference/api/pandas.core.groupby.DataFrameGroupBy.count.html#pandas.core.groupby.DataFrameGroupBy.count"><code>.count</code></a>: creates a new <strong><code>DataFrame</code></strong> with the number of entries, excluding missing values.</li>
 </ul>
 <p>Let’s illustrate some examples by creating a <code>DataFrame</code> called <code>df</code>.</p>
-<div id="b17fe9f6" class="cell" data-execution_count="31">
+<div id="58f7f4db" class="cell" data-execution_count="31">
 <div class="sourceCode cell-code" id="cb38"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb38-1"><a href="#cb38-1" aria-hidden="true" tabindex="-1"></a>df <span class="op">=</span> pd.DataFrame({<span class="st">'letter'</span>:[<span class="st">'A'</span>,<span class="st">'A'</span>,<span class="st">'B'</span>,<span class="st">'C'</span>,<span class="st">'C'</span>,<span class="st">'C'</span>], </span>
 <span id="cb38-2"><a href="#cb38-2" aria-hidden="true" tabindex="-1"></a>                   <span class="st">'num'</span>:[<span class="dv">1</span>,<span class="dv">2</span>,<span class="dv">3</span>,<span class="dv">4</span>,np.nan,<span class="dv">4</span>], </span>
 <span id="cb38-3"><a href="#cb38-3" aria-hidden="true" tabindex="-1"></a>                   <span class="st">'state'</span>:[np.nan, <span class="st">'tx'</span>, <span class="st">'fl'</span>, <span class="st">'hi'</span>, np.nan, <span class="st">'ak'</span>]})</span>
@@ -2150,7 +2150,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 </div>
 </div>
 <p>Note the slight difference between <code>.size()</code> and <code>.count()</code>: while <code>.size()</code> returns a <code>Series</code> and counts the number of entries including the missing values, <code>.count()</code> returns a <code>DataFrame</code> and counts the number of entries in each column <em>excluding missing values</em>.</p>
-<div id="c0600d66" class="cell" data-execution_count="32">
+<div id="ef9e9d83" class="cell" data-execution_count="32">
 <div class="sourceCode cell-code" id="cb39"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb39-1"><a href="#cb39-1" aria-hidden="true" tabindex="-1"></a>df.groupby(<span class="st">"letter"</span>).size()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="32">
 <pre><code>letter
@@ -2160,7 +2160,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 dtype: int64</code></pre>
 </div>
 </div>
-<div id="9f327377" class="cell" data-execution_count="33">
+<div id="6c7e6d5b" class="cell" data-execution_count="33">
 <div class="sourceCode cell-code" id="cb41"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb41-1"><a href="#cb41-1" aria-hidden="true" tabindex="-1"></a>df.groupby(<span class="st">"letter"</span>).count()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="33">
 <div>
@@ -2202,7 +2202,7 @@ <h3 data-number="4.3.2" class="anchored" data-anchor-id="other-groupby-methods">
 </div>
 </div>
 <p>You might recall that the <code>value_counts()</code> function in the previous note does something similar. It turns out <code>value_counts()</code> and <code>groupby.size()</code> are the same, except <code>value_counts()</code> sorts the resulting <code>Series</code> in descending order automatically.</p>
-<div id="7cb6c32f" class="cell" data-execution_count="34">
+<div id="4b0e4a74" class="cell" data-execution_count="34">
 <div class="sourceCode cell-code" id="cb42"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb42-1"><a href="#cb42-1" aria-hidden="true" tabindex="-1"></a>df[<span class="st">"letter"</span>].value_counts()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="34">
 <pre><code>letter
@@ -2242,7 +2242,7 @@ <h3 data-number="4.3.3" class="anchored" data-anchor-id="filtering-by-group"><sp
 <li>Return all <code>DataFrame</code> rows that correspond to these years</li>
 </ul>
 <p>For each year, we need to find the maximum <code>%</code> among <em>all</em> rows for that year. If this maximum <code>%</code> is lower than 45%, we will tell <code>pandas</code> to keep all rows corresponding to that year.</p>
-<div id="10956f55" class="cell" data-execution_count="35">
+<div id="574d25bd" class="cell" data-execution_count="35">
 <div class="sourceCode cell-code" id="cb44"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb44-1"><a href="#cb44-1" aria-hidden="true" tabindex="-1"></a>elections.groupby(<span class="st">"Year"</span>).<span class="bu">filter</span>(<span class="kw">lambda</span> sf: sf[<span class="st">"%"</span>].<span class="bu">max</span>() <span class="op">&lt;</span> <span class="dv">45</span>).head(<span class="dv">9</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="35">
 <div>
@@ -2357,10 +2357,10 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <p>What if we wish to aggregate our <code>DataFrame</code> using a non-standard function – for example, a function of our own design? We can do so by combining <code>.agg</code> with <code>lambda</code> expressions.</p>
 <p>Let’s first consider a puzzle to jog our memory. We will attempt to find the <code>Candidate</code> from each <code>Party</code> with the highest <code>%</code> of votes.</p>
 <p>A naive approach may be to group by the <code>Party</code> column and aggregate by the maximum.</p>
-<div id="474ac9ca" class="cell" data-execution_count="36">
+<div id="e469df88" class="cell" data-execution_count="36">
 <div class="sourceCode cell-code" id="cb45"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb45-1"><a href="#cb45-1" aria-hidden="true" tabindex="-1"></a>elections.groupby(<span class="st">"Party"</span>).agg(<span class="bu">max</span>).head(<span class="dv">10</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98187/4278286395.py:1: FutureWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32655/4278286395.py:1: FutureWarning:
 
 The provided callable &lt;built-in function max&gt; is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "max" instead.
 </code></pre>
@@ -2488,7 +2488,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <li>Group by <code>Party</code> and select the first row of each sub-<code>DataFrame</code></li>
 </ol>
 <p>While it may seem unintuitive, sorting <code>elections</code> by descending order of <code>%</code> is extremely helpful. If we then group by <code>Party</code>, the first row of each <code>GroupBy</code> object will contain information about the <code>Candidate</code> with the highest voter <code>%</code>.</p>
-<div id="8992eb9c" class="cell" data-execution_count="37">
+<div id="1c2f0e7b" class="cell" data-execution_count="37">
 <div class="sourceCode cell-code" id="cb47"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb47-1"><a href="#cb47-1" aria-hidden="true" tabindex="-1"></a>elections_sorted_by_percent <span class="op">=</span> elections.sort_values(<span class="st">"%"</span>, ascending<span class="op">=</span><span class="va">False</span>)</span>
 <span id="cb47-2"><a href="#cb47-2" aria-hidden="true" tabindex="-1"></a>elections_sorted_by_percent.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="37">
@@ -2559,7 +2559,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 </div>
 </div>
 </div>
-<div id="eace6446" class="cell" data-execution_count="38">
+<div id="47a3cba8" class="cell" data-execution_count="38">
 <div class="sourceCode cell-code" id="cb48"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb48-1"><a href="#cb48-1" aria-hidden="true" tabindex="-1"></a>elections_sorted_by_percent.groupby(<span class="st">"Party"</span>).agg(<span class="kw">lambda</span> x : x.iloc[<span class="dv">0</span>]).head(<span class="dv">10</span>)</span>
 <span id="cb48-2"><a href="#cb48-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb48-3"><a href="#cb48-3" aria-hidden="true" tabindex="-1"></a><span class="co"># Equivalent to the below code</span></span>
@@ -2680,7 +2680,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <p>More generally, <code>lambda</code> functions are used to design custom aggregation functions that aren’t pre-defined by Python. The input parameter <code>x</code> to the <code>lambda</code> function is a <code>GroupBy</code> object. Therefore, it should make sense why <code>lambda x : x.iloc[0]</code> selects the first row in each groupby object.</p>
 <p>In fact, there’s a few different ways to approach this problem. Each approach has different tradeoffs in terms of readability, performance, memory consumption, complexity, etc. We’ve given a few examples below.</p>
 <p><strong>Note</strong>: Understanding these alternative solutions is not required. They are given to demonstrate the vast number of problem-solving approaches in <code>pandas</code>.</p>
-<div id="8891554c" class="cell" data-execution_count="39">
+<div id="07680e63" class="cell" data-execution_count="39">
 <div class="sourceCode cell-code" id="cb49"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb49-1"><a href="#cb49-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Using the idxmax function</span></span>
 <span id="cb49-2"><a href="#cb49-2" aria-hidden="true" tabindex="-1"></a>best_per_party <span class="op">=</span> elections.loc[elections.groupby(<span class="st">'Party'</span>)[<span class="st">'%'</span>].idxmax()]</span>
 <span id="cb49-3"><a href="#cb49-3" aria-hidden="true" tabindex="-1"></a>best_per_party.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -2752,7 +2752,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 </div>
 </div>
 </div>
-<div id="4472fa23" class="cell" data-execution_count="40">
+<div id="8db27476" class="cell" data-execution_count="40">
 <div class="sourceCode cell-code" id="cb50"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb50-1"><a href="#cb50-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Using the .drop_duplicates function</span></span>
 <span id="cb50-2"><a href="#cb50-2" aria-hidden="true" tabindex="-1"></a>best_per_party2 <span class="op">=</span> elections.sort_values(<span class="st">'%'</span>).drop_duplicates([<span class="st">'Party'</span>], keep<span class="op">=</span><span class="st">'last'</span>)</span>
 <span id="cb50-3"><a href="#cb50-3" aria-hidden="true" tabindex="-1"></a>best_per_party2.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -2830,7 +2830,7 @@ <h3 data-number="4.3.4" class="anchored" data-anchor-id="aggregation-with-lambda
 <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-pivot-tables"><span class="header-section-number">4.4</span> Aggregating Data with Pivot Tables</h2>
 <p>We know now that <code>.groupby</code> gives us the ability to group and aggregate data across our <code>DataFrame</code>. The examples above formed groups using just one column in the <code>DataFrame</code>. It’s possible to group by multiple columns at once by passing in a list of column names to <code>.groupby</code>.</p>
 <p>Let’s consider the <code>babynames</code> dataset again. In this problem, we will find the total number of baby names associated with each sex for each year. To do this, we’ll group by <em>both</em> the <code>"Year"</code> and <code>"Sex"</code> columns.</p>
-<div id="b41259cb" class="cell" data-execution_count="41">
+<div id="c44cb16d" class="cell" data-execution_count="41">
 <div class="sourceCode cell-code" id="cb51"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb51-1"><a href="#cb51-1" aria-hidden="true" tabindex="-1"></a>babynames.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="41">
 <div>
@@ -2900,12 +2900,12 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 </div>
 </div>
 </div>
-<div id="6206f66d" class="cell" data-execution_count="42">
+<div id="8100c6cf" class="cell" data-execution_count="42">
 <div class="sourceCode cell-code" id="cb52"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb52-1"><a href="#cb52-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Find the total number of baby names associated with each sex for each </span></span>
 <span id="cb52-2"><a href="#cb52-2" aria-hidden="true" tabindex="-1"></a><span class="co"># year in the data</span></span>
 <span id="cb52-3"><a href="#cb52-3" aria-hidden="true" tabindex="-1"></a>babynames.groupby([<span class="st">"Year"</span>, <span class="st">"Sex"</span>])[[<span class="st">"Count"</span>]].agg(<span class="bu">sum</span>).head(<span class="dv">6</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
-<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98187/3186035650.py:3: FutureWarning:
+<pre><code>/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32655/3186035650.py:3: FutureWarning:
 
 The provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 </code></pre>
@@ -2967,7 +2967,7 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 <p>Here’s an illustration of the process:</p>
 <p><img src="images/pivot.png" alt="groupby_demo" width="600"></p>
 <p>The best way to understand pivot tables is to see one in action. Let’s return to our original goal of summing the total number of names associated with each combination of year and sex. We’ll call the <code>pandas</code> <a href="https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.pivot_table.html"><code>.pivot_table</code></a> method to create a new table.</p>
-<div id="c0b11b71" class="cell" data-execution_count="43">
+<div id="37e966c9" class="cell" data-execution_count="43">
 <div class="sourceCode cell-code" id="cb54"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb54-1"><a href="#cb54-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The `pivot_table` method is used to generate a Pandas pivot table</span></span>
 <span id="cb54-2"><a href="#cb54-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> numpy <span class="im">as</span> np</span>
 <span id="cb54-3"><a href="#cb54-3" aria-hidden="true" tabindex="-1"></a>babynames.pivot_table(</span>
@@ -3034,7 +3034,7 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 <li><code>aggfunc = np.sum</code> tells <code>pandas</code> what function to use when aggregating the data specified by <code>values</code>. Here, we are summing the name counts for each pair of <code>"Year"</code> and <code>"Sex"</code></li>
 </ul>
 <p>We can even include multiple values in the index or columns of our pivot tables.</p>
-<div id="cbb42c59" class="cell" data-execution_count="44">
+<div id="2e780968" class="cell" data-execution_count="44">
 <div class="sourceCode cell-code" id="cb55"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb55-1"><a href="#cb55-1" aria-hidden="true" tabindex="-1"></a>babynames_pivot <span class="op">=</span> babynames.pivot_table(</span>
 <span id="cb55-2"><a href="#cb55-2" aria-hidden="true" tabindex="-1"></a>    index<span class="op">=</span><span class="st">"Year"</span>,     <span class="co"># the rows (turned into index)</span></span>
 <span id="cb55-3"><a href="#cb55-3" aria-hidden="true" tabindex="-1"></a>    columns<span class="op">=</span><span class="st">"Sex"</span>,    <span class="co"># the column values</span></span>
@@ -3123,7 +3123,7 @@ <h2 data-number="4.4" class="anchored" data-anchor-id="aggregating-data-with-piv
 <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span class="header-section-number">4.5</span> Joining Tables</h2>
 <p>When working on data science projects, we’re unlikely to have absolutely all the data we want contained in a single <code>DataFrame</code> – a real-world data scientist needs to grapple with data coming from multiple sources. If we have access to multiple datasets with related information, we can join two or more tables into a single <code>DataFrame</code>.</p>
 <p>To put this into practice, we’ll revisit the <code>elections</code> dataset.</p>
-<div id="4f87f2bf" class="cell" data-execution_count="45">
+<div id="3c286a04" class="cell" data-execution_count="45">
 <div class="sourceCode cell-code" id="cb56"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb56-1"><a href="#cb56-1" aria-hidden="true" tabindex="-1"></a>elections.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="45">
 <div>
@@ -3195,7 +3195,7 @@ <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span cla
 </div>
 <p>Say we want to understand the popularity of the names of each presidential candidate in 2022. To do this, we’ll need the combined data of <code>babynames</code> <em>and</em> <code>elections</code>.</p>
 <p>We’ll start by creating a new column containing the first name of each presidential candidate. This will help us join each name in <code>elections</code> to the corresponding name data in <code>babynames</code>.</p>
-<div id="1abe54eb" class="cell" data-execution_count="46">
+<div id="91ed9704" class="cell" data-execution_count="46">
 <div class="sourceCode cell-code" id="cb57"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb57-1"><a href="#cb57-1" aria-hidden="true" tabindex="-1"></a><span class="co"># This `str` operation splits each candidate's full name at each </span></span>
 <span id="cb57-2"><a href="#cb57-2" aria-hidden="true" tabindex="-1"></a><span class="co"># blank space, then takes just the candidate's first name</span></span>
 <span id="cb57-3"><a href="#cb57-3" aria-hidden="true" tabindex="-1"></a>elections[<span class="st">"First Name"</span>] <span class="op">=</span> elections[<span class="st">"Candidate"</span>].<span class="bu">str</span>.split().<span class="bu">str</span>[<span class="dv">0</span>]</span>
@@ -3274,7 +3274,7 @@ <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span cla
 </div>
 </div>
 </div>
-<div id="123445c9" class="cell" data-execution_count="47">
+<div id="363ce3ef" class="cell" data-execution_count="47">
 <div class="sourceCode cell-code" id="cb58"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb58-1"><a href="#cb58-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Here, we'll only consider `babynames` data from 2022</span></span>
 <span id="cb58-2"><a href="#cb58-2" aria-hidden="true" tabindex="-1"></a>babynames_2022 <span class="op">=</span> babynames[babynames[<span class="st">"Year"</span>]<span class="op">==</span><span class="dv">2022</span>]</span>
 <span id="cb58-3"><a href="#cb58-3" aria-hidden="true" tabindex="-1"></a>babynames_2022.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -3347,7 +3347,7 @@ <h2 data-number="4.5" class="anchored" data-anchor-id="joining-tables"><span cla
 </div>
 </div>
 <p>Now, we’re ready to join the two tables. <a href="https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.merge.html"><code>pd.merge</code></a> is the <code>pandas</code> method used to join <code>DataFrame</code>s together.</p>
-<div id="9b9eac3a" class="cell" data-execution_count="48">
+<div id="f9133a2d" class="cell" data-execution_count="48">
 <div class="sourceCode cell-code" id="cb59"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb59-1"><a href="#cb59-1" aria-hidden="true" tabindex="-1"></a>merged <span class="op">=</span> pd.merge(left <span class="op">=</span> elections, right <span class="op">=</span> babynames_2022, <span class="op">\</span></span>
 <span id="cb59-2"><a href="#cb59-2" aria-hidden="true" tabindex="-1"></a>                  left_on <span class="op">=</span> <span class="st">"First Name"</span>, right_on <span class="op">=</span> <span class="st">"Name"</span>)</span>
 <span id="cb59-3"><a href="#cb59-3" aria-hidden="true" tabindex="-1"></a>merged.head()</span>
diff --git a/docs/regex/regex.html b/docs/regex/regex.html
index 7866c5a1..2158aea3 100644
--- a/docs/regex/regex.html
+++ b/docs/regex/regex.html
@@ -439,7 +439,7 @@ <h2 data-number="6.2" class="anchored" data-anchor-id="python-string-methods"><s
 <section id="canonicalization" class="level3" data-number="6.2.1">
 <h3 data-number="6.2.1" class="anchored" data-anchor-id="canonicalization"><span class="header-section-number">6.2.1</span> Canonicalization</h3>
 <p>Assume we want to merge the given tables.</p>
-<div id="ad5f78f4" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="1">
+<div id="4e233343" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -451,7 +451,7 @@ <h3 data-number="6.2.1" class="anchored" data-anchor-id="canonicalization"><span
 <span id="cb1-7"><a href="#cb1-7" aria-hidden="true" tabindex="-1"></a>    county_and_pop <span class="op">=</span> pd.read_csv(f)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="d67a6c46" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="2">
+<div id="167b7a24" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>display(county_and_state), display(county_and_pop)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -534,7 +534,7 @@ <h3 data-number="6.2.1" class="anchored" data-anchor-id="canonicalization"><span
 <section id="canonicalization-with-python-string-manipulation" class="level4" data-number="6.2.1.1">
 <h4 data-number="6.2.1.1" class="anchored" data-anchor-id="canonicalization-with-python-string-manipulation"><span class="header-section-number">6.2.1.1</span> Canonicalization with Python String Manipulation</h4>
 <p>The following function uses Python string manipulation to convert a single county name into canonical form. It does so by eliminating whitespace, punctuation, and unnecessary text.</p>
-<div id="019185c2" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="3">
+<div id="b4f3d2dd" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> canonicalize_county(county_name):</span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (</span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>        county_name</span>
@@ -552,7 +552,7 @@ <h4 data-number="6.2.1.1" class="anchored" data-anchor-id="canonicalization-with
 </div>
 </div>
 <p>We will use the <code>pandas</code> <code>map</code> function to apply the <code>canonicalize_county</code> function to every row in both <code>DataFrame</code>s. In doing so, we’ll create a new column in each called <code>clean_county_python</code> with the canonical form.</p>
-<div id="10b95c2c" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="4">
+<div id="58571651" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>county_and_pop[<span class="st">'clean_county_python'</span>] <span class="op">=</span> county_and_pop[<span class="st">'County'</span>].<span class="bu">map</span>(canonicalize_county)</span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>county_and_state[<span class="st">'clean_county_python'</span>] <span class="op">=</span> county_and_state[<span class="st">'County'</span>].<span class="bu">map</span>(canonicalize_county)</span>
 <span id="cb5-3"><a href="#cb5-3" aria-hidden="true" tabindex="-1"></a>display(county_and_state), display(county_and_pop)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -648,7 +648,7 @@ <h4 data-number="6.2.1.1" class="anchored" data-anchor-id="canonicalization-with
 <h4 data-number="6.2.1.2" class="anchored" data-anchor-id="canonicalization-with-pandas-series-methods"><span class="header-section-number">6.2.1.2</span> Canonicalization with Pandas Series Methods</h4>
 <p>Alternatively, we can use <code>pandas</code> <code>Series</code> methods to create this standardized column. To do so, we must call the <code>.str</code> attribute of our <code>Series</code> object prior to calling any methods, like <code>.lower</code> and <code>.replace</code>. Notice how these method names match their equivalent built-in Python string functions.</p>
 <p>Chaining multiple <code>Series</code> methods in this manner eliminates the need to use the <code>map</code> function (as this code is vectorized).</p>
-<div id="b9d711a1" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="5">
+<div id="04a5d23d" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> canonicalize_county_series(county_series):</span>
 <span id="cb6-2"><a href="#cb6-2" aria-hidden="true" tabindex="-1"></a>    <span class="cf">return</span> (</span>
 <span id="cb6-3"><a href="#cb6-3" aria-hidden="true" tabindex="-1"></a>        county_series</span>
@@ -766,7 +766,7 @@ <h4 data-number="6.2.1.2" class="anchored" data-anchor-id="canonicalization-with
 <h3 data-number="6.2.2" class="anchored" data-anchor-id="extraction"><span class="header-section-number">6.2.2</span> Extraction</h3>
 <p>Extraction explores the idea of obtaining useful information from text data. This will be particularily important in model building, which we’ll study in a few weeks.</p>
 <p>Say we want to read some data from a <code>.txt</code> file.</p>
-<div id="e23a9644" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
+<div id="f79e40a6" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="cf">with</span> <span class="bu">open</span>(<span class="st">'data/log.txt'</span>, <span class="st">'r'</span>) <span class="im">as</span> f:</span>
 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a>    log_lines <span class="op">=</span> f.readlines()</span>
 <span id="cb7-3"><a href="#cb7-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -779,7 +779,7 @@ <h3 data-number="6.2.2" class="anchored" data-anchor-id="extraction"><span class
 </div>
 <p>Suppose we want to extract the day, month, year, hour, minutes, seconds, and time zone. Unfortunately, these items are not in a fixed position from the beginning of the string, so slicing by some fixed offset won’t work.</p>
 <p>Instead, we can use some clever thinking. Notice how the relevant information is contained within a set of brackets, further separated by <code>/</code> and <code>:</code>. We can hone in on this region of text, and split the data on these characters. Python’s built-in <code>.split</code> function makes this easy.</p>
-<div id="f7c11077" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
+<div id="b2cf8adf" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>first <span class="op">=</span> log_lines[<span class="dv">0</span>] <span class="co"># Only considering the first row of data</span></span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>pertinent <span class="op">=</span> first.split(<span class="st">"["</span>)[<span class="dv">1</span>].split(<span class="st">']'</span>)[<span class="dv">0</span>]</span>
@@ -809,7 +809,7 @@ <h3 data-number="6.2.2" class="anchored" data-anchor-id="extraction"><span class
 <h2 data-number="6.3" class="anchored" data-anchor-id="regex-basics"><span class="header-section-number">6.3</span> RegEx Basics</h2>
 <p>A <strong>regular expression (“RegEx”)</strong> is a sequence of characters that specifies a search pattern. They are written to extract specific information from text. Regular expressions are essentially part of a smaller programming language embedded in Python, made available through the <code>re</code> module. As such, they have a stand-alone syntax and methods for various capabilities.</p>
 <p>Regular expressions are useful in many applications beyond data science. For example, Social Security Numbers (SSNs) are often validated with regular expressions.</p>
-<div id="ab18f606" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
+<div id="c841546d" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a><span class="co">r"[0-9]{3}-[0-9]{2}-[0-9]{4}"</span> <span class="co"># Regular Expression Syntax</span></span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a><span class="co"># 3 of any digit, then a dash,</span></span>
@@ -1112,7 +1112,7 @@ <h3 data-number="6.5.1" class="anchored" data-anchor-id="greediness"><span class
 <section id="examples-2" class="level3" data-number="6.5.2">
 <h3 data-number="6.5.2" class="anchored" data-anchor-id="examples-2"><span class="header-section-number">6.5.2</span> Examples</h3>
 <p>Let’s revisit our earlier problem of extracting date/time data from the given <code>.txt</code> files. Here is how the data looked.</p>
-<div id="aa8b92b1" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
+<div id="2a878709" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>log_lines[<span class="dv">0</span>]</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
 <pre><code>'169.237.46.168 - - [26/Jan/2014:10:47:58 -0800] "GET /stat141/Winter04/ HTTP/1.1" 200 2585 "http://anson.ucdavis.edu/courses/"\n'</code></pre>
@@ -1145,7 +1145,7 @@ <h4 data-number="6.6.1.1" class="anchored" data-anchor-id="canonicalization-with
 <p>The regular expression here removes text surrounded by <code>&lt;&gt;</code> (also known as HTML tags).</p>
 <p>In order, the pattern matches … 1. a single <code>&lt;</code> 2. any character that is not a <code>&gt;</code> : div, td valign…, /td, /div 3. a single <code>&gt;</code></p>
 <p>Any substring in <code>text</code> that fulfills all three conditions will be replaced by <code>''</code>.</p>
-<div id="3edc86bb" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
+<div id="ae8f544b" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> re</span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>text <span class="op">=</span> <span class="st">"&lt;div&gt;&lt;td valign='top'&gt;Moo&lt;/td&gt;&lt;/div&gt;"</span></span>
@@ -1162,7 +1162,7 @@ <h4 data-number="6.6.1.1" class="anchored" data-anchor-id="canonicalization-with
 <h4 data-number="6.6.1.2" class="anchored" data-anchor-id="canonicalization-with-pandas"><span class="header-section-number">6.6.1.2</span> Canonicalization with <code>pandas</code></h4>
 <p>We can also use regular expressions with <code>pandas</code> <code>Series</code> methods. This gives us the benefit of operating on an entire column of data as opposed to a single value. The code is simple: <br> <code>ser.str.replace(pattern, repl, regex=True</code>).</p>
 <p>Consider the following <code>DataFrame</code> <code>html_data</code> with a single column.</p>
-<div id="4a976ab9" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
+<div id="4d04303c" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> {<span class="st">"HTML"</span>: [<span class="st">"&lt;div&gt;&lt;td valign='top'&gt;Moo&lt;/td&gt;&lt;/div&gt;"</span>, <span class="op">\</span></span>
@@ -1171,7 +1171,7 @@ <h4 data-number="6.6.1.2" class="anchored" data-anchor-id="canonicalization-with
 <span id="cb17-4"><a href="#cb17-4" aria-hidden="true" tabindex="-1"></a>html_data <span class="op">=</span> pd.DataFrame(data)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="e78799ef" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
+<div id="d32dcaa7" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>html_data</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="12">
 <div>
@@ -1203,7 +1203,7 @@ <h4 data-number="6.6.1.2" class="anchored" data-anchor-id="canonicalization-with
 </div>
 </div>
 </div>
-<div id="90cef9c9" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
+<div id="e244d5e7" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>pattern <span class="op">=</span> <span class="vs">r"&lt;[^&gt;]+&gt;"</span></span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>html_data[<span class="st">'HTML'</span>].<span class="bu">str</span>.replace(pattern, <span class="st">''</span>, regex<span class="op">=</span><span class="va">True</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
@@ -1221,7 +1221,7 @@ <h3 data-number="6.6.2" class="anchored" data-anchor-id="extraction-1"><span cla
 <h4 data-number="6.6.2.1" class="anchored" data-anchor-id="extraction-with-regex"><span class="header-section-number">6.6.2.1</span> Extraction with RegEx</h4>
 <p>Just like with canonicalization, the <code>re</code> module provides capability to extract relevant text from a string: <br> <code>re.findall(pattern, text)</code>. This function returns a list of all matches to <code>pattern</code>.</p>
 <p>Using the familiar regular expression for Social Security Numbers:</p>
-<div id="8745d802" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
+<div id="c2de1d98" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>text <span class="op">=</span> <span class="st">"My social security number is 123-45-6789 bro, or maybe it’s 321-45-6789."</span></span>
 <span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>pattern <span class="op">=</span> <span class="vs">r"[0-9]</span><span class="sc">{3}</span><span class="vs">-[0-9]</span><span class="sc">{2}</span><span class="vs">-[0-9]</span><span class="sc">{4}</span><span class="vs">"</span></span>
 <span id="cb21-3"><a href="#cb21-3" aria-hidden="true" tabindex="-1"></a>re.findall(pattern, text)  </span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -1234,7 +1234,7 @@ <h4 data-number="6.6.2.1" class="anchored" data-anchor-id="extraction-with-regex
 <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-pandas"><span class="header-section-number">6.6.2.2</span> Extraction with <code>pandas</code></h4>
 <p><code>pandas</code> similarily provides extraction functionality on a <code>Series</code> of data: <code>ser.str.findall(pattern)</code></p>
 <p>Consider the following <code>DataFrame</code> <code>ssn_data</code>.</p>
-<div id="744b1da1" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
+<div id="1556cfb1" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> {<span class="st">"SSN"</span>: [<span class="st">"987-65-4321"</span>, <span class="st">"forty"</span>, <span class="op">\</span></span>
@@ -1243,7 +1243,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 <span id="cb23-4"><a href="#cb23-4" aria-hidden="true" tabindex="-1"></a>ssn_data <span class="op">=</span> pd.DataFrame(data)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="1b766844" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
+<div id="08b43456" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a>ssn_data</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="16">
 <div>
@@ -1279,7 +1279,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 </div>
 </div>
 </div>
-<div id="225c16ef" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
+<div id="76276526" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a>ssn_data[<span class="st">"SSN"</span>].<span class="bu">str</span>.findall(pattern)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="17">
 <pre><code>0                 [987-65-4321]
@@ -1291,7 +1291,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 </div>
 <p>This function returns a list for every row containing the pattern matches in a given string.</p>
 <p>As you may expect, there are similar <code>pandas</code> equivalents for other <code>re</code> functions as well. <code>Series.str.extract</code> takes in a pattern and returns a <code>DataFrame</code> of each capture group’s first match in the string. In contrast, <code>Series.str.extractall</code> returns a multi-indexed <code>DataFrame</code> of all matches for each capture group. You can see the difference in the outputs below:</p>
-<div id="3834ee15" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
+<div id="b8157530" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a>pattern_cg <span class="op">=</span> <span class="vs">r"([0-9]</span><span class="sc">{3}</span><span class="vs">)-([0-9]</span><span class="sc">{2}</span><span class="vs">)-([0-9]</span><span class="sc">{4}</span><span class="vs">)"</span></span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a>ssn_data[<span class="st">"SSN"</span>].<span class="bu">str</span>.extract(pattern_cg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="18">
@@ -1338,7 +1338,7 @@ <h4 data-number="6.6.2.2" class="anchored" data-anchor-id="extraction-with-panda
 </div>
 </div>
 </div>
-<div id="dc6d97a9" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
+<div id="2ff990c8" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb28"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb28-1"><a href="#cb28-1" aria-hidden="true" tabindex="-1"></a>ssn_data[<span class="st">"SSN"</span>].<span class="bu">str</span>.extractall(pattern_cg)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="19">
 <div>
@@ -1403,12 +1403,12 @@ <h3 data-number="6.6.3" class="anchored" data-anchor-id="regular-expression-capt
 <p>Let’s take a look at an example.</p>
 <section id="example-1" class="level4" data-number="6.6.3.1">
 <h4 data-number="6.6.3.1" class="anchored" data-anchor-id="example-1"><span class="header-section-number">6.6.3.1</span> Example 1</h4>
-<div id="42311b0c" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
+<div id="fe14cc33" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb29"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb29-1"><a href="#cb29-1" aria-hidden="true" tabindex="-1"></a>text <span class="op">=</span> <span class="st">"Observations: 03:04:53 - Horse awakens. </span><span class="ch">\</span></span>
 <span id="cb29-2"><a href="#cb29-2" aria-hidden="true" tabindex="-1"></a><span class="st">        03:05:14 - Horse goes back to sleep."</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
 <p>Say we want to capture all occurences of time data (hour, minute, and second) as <em>separate entities</em>.</p>
-<div id="3cf2e860" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
+<div id="08a6e0b9" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb30"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb30-1"><a href="#cb30-1" aria-hidden="true" tabindex="-1"></a>pattern_1 <span class="op">=</span> <span class="vs">r"(\d\d):(\d\d):(\d\d)"</span></span>
 <span id="cb30-2"><a href="#cb30-2" aria-hidden="true" tabindex="-1"></a>re.findall(pattern_1, text)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="21">
@@ -1417,7 +1417,7 @@ <h4 data-number="6.6.3.1" class="anchored" data-anchor-id="example-1"><span clas
 </div>
 <p>Notice how the given pattern has 3 capture groups, each specified by the regular expression <code>(\d\d)</code>. We then use <code>re.findall</code> to return these capture groups, each as tuples containing 3 matches.</p>
 <p>These regular expression capture groups can be different. We can use the <code>(\d{2})</code> shorthand to extract the same data.</p>
-<div id="5158005c" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
+<div id="6eea5e33" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb32"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb32-1"><a href="#cb32-1" aria-hidden="true" tabindex="-1"></a>pattern_2 <span class="op">=</span> <span class="vs">r"(\d\d):(\d\d):(\d</span><span class="sc">{2}</span><span class="vs">)"</span></span>
 <span id="cb32-2"><a href="#cb32-2" aria-hidden="true" tabindex="-1"></a>re.findall(pattern_2, text)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="22">
@@ -1428,14 +1428,14 @@ <h4 data-number="6.6.3.1" class="anchored" data-anchor-id="example-1"><span clas
 <section id="example-2" class="level4" data-number="6.6.3.2">
 <h4 data-number="6.6.3.2" class="anchored" data-anchor-id="example-2"><span class="header-section-number">6.6.3.2</span> Example 2</h4>
 <p>With the notion of capture groups, convince yourself how the following regular expression works.</p>
-<div id="c92ab43e" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="23">
+<div id="08afc6de" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb34"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb34-1"><a href="#cb34-1" aria-hidden="true" tabindex="-1"></a>first <span class="op">=</span> log_lines[<span class="dv">0</span>]</span>
 <span id="cb34-2"><a href="#cb34-2" aria-hidden="true" tabindex="-1"></a>first</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="23">
 <pre><code>'169.237.46.168 - - [26/Jan/2014:10:47:58 -0800] "GET /stat141/Winter04/ HTTP/1.1" 200 2585 "http://anson.ucdavis.edu/courses/"\n'</code></pre>
 </div>
 </div>
-<div id="5632c360" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="24">
+<div id="e185ca50" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="24">
 <div class="sourceCode cell-code" id="cb36"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb36-1"><a href="#cb36-1" aria-hidden="true" tabindex="-1"></a>pattern <span class="op">=</span> <span class="vs">r'\[(\d+)\/(\w+)\/(\d+):(\d+):(\d+):(\d+) (.+)\]'</span></span>
 <span id="cb36-2"><a href="#cb36-2" aria-hidden="true" tabindex="-1"></a>day, month, year, hour, minute, second, time_zone <span class="op">=</span> re.findall(pattern, first)[<span class="dv">0</span>]</span>
 <span id="cb36-3"><a href="#cb36-3" aria-hidden="true" tabindex="-1"></a><span class="bu">print</span>(day, month, year, hour, minute, second, time_zone)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
diff --git a/docs/sampling/sampling.html b/docs/sampling/sampling.html
index 70990390..0d63c934 100644
--- a/docs/sampling/sampling.html
+++ b/docs/sampling/sampling.html
@@ -482,7 +482,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 <li>There are only two movies they can watch on July 21st: Barbie and Oppenheimer.</li>
 <li>Every resident watches a movie (either Barbie or Oppenheimer) on July 21st.</li>
 </ul>
-<div id="0231f4b2" class="cell" data-execution_count="1">
+<div id="7e1326e5" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
@@ -496,7 +496,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 <span id="cb1-9"><a href="#cb1-9" aria-hidden="true" tabindex="-1"></a>rng <span class="op">=</span> np.random.default_rng()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </details>
 </div>
-<div id="d4d3e994" class="cell" data-execution_count="2">
+<div id="eaa22c5f" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>movie <span class="op">=</span> pd.read_csv(<span class="st">"data/movie.csv"</span>)</span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a><span class="co"># create a 1/0 int that indicates Barbie vote</span></span>
@@ -559,7 +559,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 </div>
 </div>
 <p>What fraction of Berkeley residents chose Barbie?</p>
-<div id="193c2496" class="cell" data-execution_count="3">
+<div id="9210f0a3" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>actual_barbie <span class="op">=</span> np.mean(movie[<span class="st">"barbie"</span>])</span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a>actual_barbie</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="3">
@@ -570,7 +570,7 @@ <h3 data-number="9.3.3" class="anchored" data-anchor-id="demo-barbie-v.-oppenhei
 <section id="convenience-sample-retirees" class="level4" data-number="9.3.3.1">
 <h4 data-number="9.3.3.1" class="anchored" data-anchor-id="convenience-sample-retirees"><span class="header-section-number">9.3.3.1</span> Convenience Sample: Retirees</h4>
 <p>Let’s take a convenience sample of people who have retired (&gt;= 65 years old). What proportion of them went to see Barbie instead of Oppenheimer?</p>
-<div id="03e95aff" class="cell" data-execution_count="4">
+<div id="e3e0bd32" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>convenience_sample <span class="op">=</span> movie[movie[<span class="st">'age'</span>] <span class="op">&gt;=</span> <span class="dv">65</span>] <span class="co"># take a convenience sample of retirees</span></span>
 <span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a>np.mean(convenience_sample[<span class="st">"barbie"</span>]) <span class="co"># what proportion of them saw Barbie? </span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="4">
@@ -578,14 +578,14 @@ <h4 data-number="9.3.3.1" class="anchored" data-anchor-id="convenience-sample-re
 </div>
 </div>
 <p>Based on this result, we would have predicted that Oppenheimer would win! What happened? Is it possible that our sample is too small or noisy?</p>
-<div id="7ea64f4d" class="cell" data-execution_count="5">
+<div id="111e9940" class="cell" data-execution_count="5">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># what's the size of our sample? </span></span>
 <span id="cb7-2"><a href="#cb7-2" aria-hidden="true" tabindex="-1"></a><span class="bu">len</span>(convenience_sample)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="5">
 <pre><code>359396</code></pre>
 </div>
 </div>
-<div id="7617d20d" class="cell" data-execution_count="6">
+<div id="e3800ac4" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># what proportion of our data is in the convenience sample? </span></span>
 <span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="bu">len</span>(convenience_sample)<span class="op">/</span><span class="bu">len</span>(movie)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="6">
@@ -597,7 +597,7 @@ <h4 data-number="9.3.3.1" class="anchored" data-anchor-id="convenience-sample-re
 <section id="check-for-bias" class="level4" data-number="9.3.3.2">
 <h4 data-number="9.3.3.2" class="anchored" data-anchor-id="check-for-bias"><span class="header-section-number">9.3.3.2</span> Check for Bias</h4>
 <p>Let us aggregate all choices by age and visualize the fraction of Barbie views, split by gender.</p>
-<div id="a52bf085" class="cell" data-execution_count="7">
+<div id="3371db7f" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>votes_by_barbie <span class="op">=</span> movie.groupby([<span class="st">"age"</span>,<span class="st">"is_male"</span>]).agg(<span class="st">"mean"</span>, numeric_only<span class="op">=</span><span class="va">True</span>).reset_index()</span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>votes_by_barbie.head()</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="7">
@@ -650,7 +650,7 @@ <h4 data-number="9.3.3.2" class="anchored" data-anchor-id="check-for-bias"><span
 </div>
 </div>
 </div>
-<div id="2873ae57" class="cell" data-execution_count="8">
+<div id="15a061af" class="cell" data-execution_count="8">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="co"># A common matplotlib/seaborn pattern: create the figure and axes object, pass ax</span></span>
@@ -681,17 +681,17 @@ <h4 data-number="9.3.3.2" class="anchored" data-anchor-id="check-for-bias"><span
 <section id="simple-random-sample" class="level4" data-number="9.3.3.3">
 <h4 data-number="9.3.3.3" class="anchored" data-anchor-id="simple-random-sample"><span class="header-section-number">9.3.3.3</span> Simple Random Sample</h4>
 <p>Suppose we took a simple random sample (SRS) of the same size as our retiree sample:</p>
-<div id="2db10925" class="cell" data-execution_count="9">
+<div id="e72aab20" class="cell" data-execution_count="9">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="bu">len</span>(convenience_sample)</span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>random_sample <span class="op">=</span> movie.sample(n, replace <span class="op">=</span> <span class="va">False</span>) <span class="co">## By default, replace = False</span></span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>np.mean(random_sample[<span class="st">"barbie"</span>])</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="9">
-<pre><code>np.float64(0.5302813609500384)</code></pre>
+<pre><code>np.float64(0.5303286625338067)</code></pre>
 </div>
 </div>
 <p>This is very close to the actual vote of 0.5302792307692308!</p>
 <p>It turns out that we can get similar results with a <strong>much smaller sample size</strong>, say, 800:</p>
-<div id="042e8d85" class="cell" data-execution_count="10">
+<div id="a1affbcc" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">800</span></span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>random_sample <span class="op">=</span> movie.sample(n, replace <span class="op">=</span> <span class="va">False</span>)</span>
 <span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -704,7 +704,7 @@ <h4 data-number="9.3.3.3" class="anchored" data-anchor-id="simple-random-sample"
 <span id="cb15-10"><a href="#cb15-10" aria-hidden="true" tabindex="-1"></a>Markdown(<span class="ss">f"**Actual** = </span><span class="sc">{</span>actual_barbie<span class="sc">:.4f}</span><span class="ss">, **Sample** = </span><span class="sc">{</span>sample_barbie<span class="sc">:.4f}</span><span class="ss">, "</span></span>
 <span id="cb15-11"><a href="#cb15-11" aria-hidden="true" tabindex="-1"></a>         <span class="ss">f"**Err** = </span><span class="sc">{</span><span class="dv">100</span><span class="op">*</span>err<span class="sc">:.2f}</span><span class="ss">%."</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display cell-output-markdown" data-execution_count="10">
-<p><strong>Actual</strong> = 0.5303, <strong>Sample</strong> = 0.5387, <strong>Err</strong> = 1.60%.</p>
+<p><strong>Actual</strong> = 0.5303, <strong>Sample</strong> = 0.5250, <strong>Err</strong> = 1.00%.</p>
 </div>
 </div>
 <p>We’ll learn how to choose this number when we (re)learn the Central Limit Theorem later in the semester.</p>
@@ -713,7 +713,7 @@ <h4 data-number="9.3.3.3" class="anchored" data-anchor-id="simple-random-sample"
 <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-error"><span class="header-section-number">9.3.3.4</span> Quantifying Chance Error</h4>
 <p>In our SRS of size 800, what would be our chance error?</p>
 <p>Let’s simulate 1000 versions of taking the 800-sized SRS from before:</p>
-<div id="2cd83300" class="cell" data-execution_count="11">
+<div id="576455bf" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>nrep <span class="op">=</span> <span class="dv">1000</span>   <span class="co"># number of simulations</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>n <span class="op">=</span> <span class="dv">800</span>       <span class="co"># size of our sample</span></span>
 <span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>poll_result <span class="op">=</span> []</span>
@@ -721,7 +721,7 @@ <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-er
 <span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>    random_sample <span class="op">=</span> movie.sample(n, replace <span class="op">=</span> <span class="va">False</span>)</span>
 <span id="cb16-6"><a href="#cb16-6" aria-hidden="true" tabindex="-1"></a>    poll_result.append(np.mean(random_sample[<span class="st">"barbie"</span>]))</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<div id="2c3ffb1e" class="cell" data-execution_count="12">
+<div id="6b78628d" class="cell" data-execution_count="12">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>fig, ax <span class="op">=</span> plt.subplots()</span>
@@ -743,15 +743,15 @@ <h4 data-number="9.3.3.4" class="anchored" data-anchor-id="quantifying-chance-er
 </div>
 </div>
 <p>What fraction of these simulated samples would have predicted Barbie?</p>
-<div id="9ca45209" class="cell" data-execution_count="13">
+<div id="2bf4dda2" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>poll_result <span class="op">=</span> pd.Series(poll_result)</span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>np.<span class="bu">sum</span>(poll_result <span class="op">&gt;</span> <span class="fl">0.5</span>)<span class="op">/</span><span class="dv">1000</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="13">
-<pre><code>np.float64(0.957)</code></pre>
+<pre><code>np.float64(0.934)</code></pre>
 </div>
 </div>
 <p>You can see the curve looks roughly Gaussian/normal. Using KDE:</p>
-<div id="b3e1b1bf" class="cell" data-execution_count="14">
+<div id="44856357" class="cell" data-execution_count="14">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(poll_result, stat<span class="op">=</span><span class="st">'density'</span>, kde<span class="op">=</span><span class="va">True</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
diff --git a/docs/sampling/sampling_files/figure-html/cell-13-output-2.png b/docs/sampling/sampling_files/figure-html/cell-13-output-2.png
index 23c50428..41cb8044 100644
Binary files a/docs/sampling/sampling_files/figure-html/cell-13-output-2.png and b/docs/sampling/sampling_files/figure-html/cell-13-output-2.png differ
diff --git a/docs/sampling/sampling_files/figure-html/cell-15-output-2.png b/docs/sampling/sampling_files/figure-html/cell-15-output-2.png
index a178fbea..2b97e744 100644
Binary files a/docs/sampling/sampling_files/figure-html/cell-15-output-2.png and b/docs/sampling/sampling_files/figure-html/cell-15-output-2.png differ
diff --git a/docs/search.json b/docs/search.json
index 0e70d3d7..a7f54207 100644
--- a/docs/search.json
+++ b/docs/search.json
@@ -144,7 +144,7 @@
     "href": "pandas_2/pandas_2.html#useful-utility-functions",
     "title": "3  Pandas II",
     "section": "3.3 Useful Utility Functions",
-    "text": "3.3 Useful Utility Functions\npandas contains an extensive library of functions that can help shorten the process of setting and getting information from its data structures. In the following section, we will give overviews of each of the main utility functions that will help us in Data 100.\nDiscussing all functionality offered by pandas could take an entire semester! We will walk you through the most commonly-used functions and encourage you to explore and experiment on your own.\n\nNumPy and built-in function support\n.shape\n.size\n.describe()\n.sample()\n.value_counts()\n.unique()\n.sort_values()\n\nThe pandas documentation will be a valuable resource in Data 100 and beyond.\n\n3.3.1 NumPy\npandas is designed to work well with NumPy, the framework for array computations you encountered in Data 8. Just about any NumPy function can be applied to pandas DataFrames and Series.\n\n# Pull out the number of babies named Yash each year\nyash_count = babynames[babynames[\"Name\"] == \"Yash\"][\"Count\"]\nyash_count.head()\n\n331824     8\n334114     9\n336390    11\n338773    12\n341387    10\nName: Count, dtype: int64\n\n\n\n# Average number of babies named Yash each year\nnp.mean(yash_count)\n\nnp.float64(17.142857142857142)\n\n\n\n# Max number of babies named Yash born in any one year\nnp.max(yash_count)\n\nnp.int64(29)\n\n\n\n\n3.3.2 .shape and .size\n.shape and .size are attributes of Series and DataFrames that measure the “amount” of data stored in the structure. Calling .shape returns a tuple containing the number of rows and columns present in the DataFrame or Series. .size is used to find the total number of elements in a structure, equivalent to the number of rows times the number of columns.\nMany functions strictly require the dimensions of the arguments along certain axes to match. Calling these dimension-finding functions is much faster than counting all of the items by hand.\n\n# Return the shape of the DataFrame, in the format (num_rows, num_columns)\nbabynames.shape\n\n(407428, 5)\n\n\n\n# Return the size of the DataFrame, equal to num_rows * num_columns\nbabynames.size\n\n2037140\n\n\n\n\n3.3.3 .describe()\nIf many statistics are required from a DataFrame (minimum value, maximum value, mean value, etc.), then .describe() (documentation) can be used to compute all of them at once.\n\nbabynames.describe()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\n\n\ncount\n407428.000000\n407428.000000\n\n\nmean\n1985.733609\n79.543456\n\n\nstd\n27.007660\n293.698654\n\n\nmin\n1910.000000\n5.000000\n\n\n25%\n1969.000000\n7.000000\n\n\n50%\n1992.000000\n13.000000\n\n\n75%\n2008.000000\n38.000000\n\n\nmax\n2022.000000\n8260.000000\n\n\n\n\n\n\n\nA different set of statistics will be reported if .describe() is called on a Series.\n\nbabynames[\"Sex\"].describe()\n\ncount     407428\nunique         2\ntop            F\nfreq      239537\nName: Sex, dtype: object\n\n\n\n\n3.3.4 .sample()\nAs we will see later in the semester, random processes are at the heart of many data science techniques (for example, train-test splits, bootstrapping, and cross-validation). .sample() (documentation) lets us quickly select random entries (a row if called from a DataFrame, or a value if called from a Series).\nBy default, .sample() selects entries without replacement. Pass in the argument replace=True to sample with replacement.\n\n# Sample a single row\nbabynames.sample()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n128546\nCA\nF\n1994\nJanel\n31\n\n\n\n\n\n\n\nNaturally, this can be chained with other methods and operators (iloc, etc.).\n\n# Sample 5 random rows, and select all columns after column 2\nbabynames.sample(5).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n307473\n1985\nJacques\n14\n\n\n256794\n1944\nLeroy\n153\n\n\n372703\n2010\nTheron\n5\n\n\n281347\n1968\nValentin\n12\n\n\n252914\n1938\nAlbert\n303\n\n\n\n\n\n\n\n\n# Randomly sample 4 names from the year 2000, with replacement, and select all columns after column 2\nbabynames[babynames[\"Year\"] == 2000].sample(4, replace = True).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n343316\n2000\nJacobo\n20\n\n\n151522\n2000\nJensen\n8\n\n\n150931\n2000\nAlyssia\n11\n\n\n152280\n2000\nZayda\n6\n\n\n\n\n\n\n\n\n\n3.3.5 .value_counts()\nThe Series.value_counts() (documentation) method counts the number of occurrence of each unique value in a Series. In other words, it counts the number of times each unique value appears. This is often useful for determining the most or least common entries in a Series.\nIn the example below, we can determine the name with the most years in which at least one person has taken that name by counting the number of times each name appears in the \"Name\" column of babynames. Note that the return value is also a Series.\n\nbabynames[\"Name\"].value_counts().head()\n\nName\nJean         223\nFrancis      221\nGuadalupe    218\nJessie       217\nMarion       214\nName: count, dtype: int64\n\n\n\n\n3.3.6 .unique()\nIf we have a Series with many repeated values, then .unique() (documentation) can be used to identify only the unique values. Here we return an array of all the names in babynames.\n\nbabynames[\"Name\"].unique()\n\narray(['Mary', 'Helen', 'Dorothy', ..., 'Zae', 'Zai', 'Zayvier'],\n      dtype=object)\n\n\n\n\n3.3.7 .sort_values()\nOrdering a DataFrame can be useful for isolating extreme values. For example, the first 5 entries of a row sorted in descending order (that is, from highest to lowest) are the largest 5 values. .sort_values (documentation) allows us to order a DataFrame or Series by a specified column. We can choose to either receive the rows in ascending order (default) or descending order.\n\n# Sort the \"Count\" column from highest to lowest\nbabynames.sort_values(by=\"Count\", ascending=False).head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n268041\nCA\nM\n1957\nMichael\n8260\n\n\n267017\nCA\nM\n1956\nMichael\n8258\n\n\n317387\nCA\nM\n1990\nMichael\n8246\n\n\n281850\nCA\nM\n1969\nMichael\n8245\n\n\n283146\nCA\nM\n1970\nMichael\n8196\n\n\n\n\n\n\n\nUnlike when calling .value_counts() on a DataFrame, we do not need to explicitly specify the column used for sorting when calling .value_counts() on a Series. We can still specify the ordering paradigm – that is, whether values are sorted in ascending or descending order.\n\n# Sort the \"Name\" Series alphabetically\nbabynames[\"Name\"].sort_values(ascending=True).head()\n\n366001      Aadan\n384005      Aadan\n369120      Aadan\n398211    Aadarsh\n370306      Aaden\nName: Name, dtype: object",
+    "text": "3.3 Useful Utility Functions\npandas contains an extensive library of functions that can help shorten the process of setting and getting information from its data structures. In the following section, we will give overviews of each of the main utility functions that will help us in Data 100.\nDiscussing all functionality offered by pandas could take an entire semester! We will walk you through the most commonly-used functions and encourage you to explore and experiment on your own.\n\nNumPy and built-in function support\n.shape\n.size\n.describe()\n.sample()\n.value_counts()\n.unique()\n.sort_values()\n\nThe pandas documentation will be a valuable resource in Data 100 and beyond.\n\n3.3.1 NumPy\npandas is designed to work well with NumPy, the framework for array computations you encountered in Data 8. Just about any NumPy function can be applied to pandas DataFrames and Series.\n\n# Pull out the number of babies named Yash each year\nyash_count = babynames[babynames[\"Name\"] == \"Yash\"][\"Count\"]\nyash_count.head()\n\n331824     8\n334114     9\n336390    11\n338773    12\n341387    10\nName: Count, dtype: int64\n\n\n\n# Average number of babies named Yash each year\nnp.mean(yash_count)\n\nnp.float64(17.142857142857142)\n\n\n\n# Max number of babies named Yash born in any one year\nnp.max(yash_count)\n\nnp.int64(29)\n\n\n\n\n3.3.2 .shape and .size\n.shape and .size are attributes of Series and DataFrames that measure the “amount” of data stored in the structure. Calling .shape returns a tuple containing the number of rows and columns present in the DataFrame or Series. .size is used to find the total number of elements in a structure, equivalent to the number of rows times the number of columns.\nMany functions strictly require the dimensions of the arguments along certain axes to match. Calling these dimension-finding functions is much faster than counting all of the items by hand.\n\n# Return the shape of the DataFrame, in the format (num_rows, num_columns)\nbabynames.shape\n\n(407428, 5)\n\n\n\n# Return the size of the DataFrame, equal to num_rows * num_columns\nbabynames.size\n\n2037140\n\n\n\n\n3.3.3 .describe()\nIf many statistics are required from a DataFrame (minimum value, maximum value, mean value, etc.), then .describe() (documentation) can be used to compute all of them at once.\n\nbabynames.describe()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\n\n\ncount\n407428.000000\n407428.000000\n\n\nmean\n1985.733609\n79.543456\n\n\nstd\n27.007660\n293.698654\n\n\nmin\n1910.000000\n5.000000\n\n\n25%\n1969.000000\n7.000000\n\n\n50%\n1992.000000\n13.000000\n\n\n75%\n2008.000000\n38.000000\n\n\nmax\n2022.000000\n8260.000000\n\n\n\n\n\n\n\nA different set of statistics will be reported if .describe() is called on a Series.\n\nbabynames[\"Sex\"].describe()\n\ncount     407428\nunique         2\ntop            F\nfreq      239537\nName: Sex, dtype: object\n\n\n\n\n3.3.4 .sample()\nAs we will see later in the semester, random processes are at the heart of many data science techniques (for example, train-test splits, bootstrapping, and cross-validation). .sample() (documentation) lets us quickly select random entries (a row if called from a DataFrame, or a value if called from a Series).\nBy default, .sample() selects entries without replacement. Pass in the argument replace=True to sample with replacement.\n\n# Sample a single row\nbabynames.sample()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n57822\nCA\nF\n1968\nLaverne\n14\n\n\n\n\n\n\n\nNaturally, this can be chained with other methods and operators (iloc, etc.).\n\n# Sample 5 random rows, and select all columns after column 2\nbabynames.sample(5).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n141121\n1997\nAolani\n6\n\n\n313586\n1988\nShant\n18\n\n\n206812\n2014\nAulani\n25\n\n\n289521\n1974\nDarick\n8\n\n\n180694\n2007\nKaylanie\n5\n\n\n\n\n\n\n\n\n# Randomly sample 4 names from the year 2000, with replacement, and select all columns after column 2\nbabynames[babynames[\"Year\"] == 2000].sample(4, replace = True).iloc[:, 2:]\n\n\n\n\n\n\n\n\nYear\nName\nCount\n\n\n\n\n149362\n2000\nRose\n145\n\n\n149859\n2000\nAliya\n40\n\n\n149281\n2000\nJada\n193\n\n\n343784\n2000\nTristian\n11\n\n\n\n\n\n\n\n\n\n3.3.5 .value_counts()\nThe Series.value_counts() (documentation) method counts the number of occurrence of each unique value in a Series. In other words, it counts the number of times each unique value appears. This is often useful for determining the most or least common entries in a Series.\nIn the example below, we can determine the name with the most years in which at least one person has taken that name by counting the number of times each name appears in the \"Name\" column of babynames. Note that the return value is also a Series.\n\nbabynames[\"Name\"].value_counts().head()\n\nName\nJean         223\nFrancis      221\nGuadalupe    218\nJessie       217\nMarion       214\nName: count, dtype: int64\n\n\n\n\n3.3.6 .unique()\nIf we have a Series with many repeated values, then .unique() (documentation) can be used to identify only the unique values. Here we return an array of all the names in babynames.\n\nbabynames[\"Name\"].unique()\n\narray(['Mary', 'Helen', 'Dorothy', ..., 'Zae', 'Zai', 'Zayvier'],\n      dtype=object)\n\n\n\n\n3.3.7 .sort_values()\nOrdering a DataFrame can be useful for isolating extreme values. For example, the first 5 entries of a row sorted in descending order (that is, from highest to lowest) are the largest 5 values. .sort_values (documentation) allows us to order a DataFrame or Series by a specified column. We can choose to either receive the rows in ascending order (default) or descending order.\n\n# Sort the \"Count\" column from highest to lowest\nbabynames.sort_values(by=\"Count\", ascending=False).head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\n\n\n\n\n268041\nCA\nM\n1957\nMichael\n8260\n\n\n267017\nCA\nM\n1956\nMichael\n8258\n\n\n317387\nCA\nM\n1990\nMichael\n8246\n\n\n281850\nCA\nM\n1969\nMichael\n8245\n\n\n283146\nCA\nM\n1970\nMichael\n8196\n\n\n\n\n\n\n\nUnlike when calling .value_counts() on a DataFrame, we do not need to explicitly specify the column used for sorting when calling .value_counts() on a Series. We can still specify the ordering paradigm – that is, whether values are sorted in ascending or descending order.\n\n# Sort the \"Name\" Series alphabetically\nbabynames[\"Name\"].sort_values(ascending=True).head()\n\n366001      Aadan\n384005      Aadan\n369120      Aadan\n398211    Aadarsh\n370306      Aaden\nName: Name, dtype: object",
     "crumbs": [
       "<span class='chapter-number'>3</span>  <span class='chapter-title'>Pandas II</span>"
     ]
@@ -184,7 +184,7 @@
     "href": "pandas_3/pandas_3.html#aggregating-data-with-.groupby",
     "title": "4  Pandas III",
     "section": "4.2 Aggregating Data with .groupby",
-    "text": "4.2 Aggregating Data with .groupby\nUp until this point, we have been working with individual rows of DataFrames. As data scientists, we often wish to investigate trends across a larger subset of our data. For example, we may want to compute some summary statistic (the mean, median, sum, etc.) for a group of rows in our DataFrame. To do this, we’ll use pandas GroupBy objects. Our goal is to group together rows that fall under the same category and perform an operation that aggregates across all rows in the category.\nLet’s say we wanted to aggregate all rows in babynames for a given year.\n\nbabynames.groupby(\"Year\")\n\n&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x1199d4650&gt;\n\n\nWhat does this strange output mean? Calling .groupby (documentation) has generated a GroupBy object. You can imagine this as a set of “mini” sub-DataFrames, where each subframe contains all of the rows from babynames that correspond to a particular year.\nThe diagram below shows a simplified view of babynames to help illustrate this idea.\n\n\n\nWe can’t work with a GroupBy object directly – that is why you saw that strange output earlier rather than a standard view of a DataFrame. To actually manipulate values within these “mini” DataFrames, we’ll need to call an aggregation method. This is a method that tells pandas how to aggregate the values within the GroupBy object. Once the aggregation is applied, pandas will return a normal (now grouped) DataFrame.\nThe first aggregation method we’ll consider is .agg. The .agg method takes in a function as its argument; this function is then applied to each column of a “mini” grouped DataFrame. We end up with a new DataFrame with one aggregated row per subframe. Let’s see this in action by finding the sum of all counts for each year in babynames – this is equivalent to finding the number of babies born in each year.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nWe can relate this back to the diagram we used above. Remember that the diagram uses a simplified version of babynames, which is why we see smaller values for the summed counts.\n\n\n\nPerforming an aggregation\n\n\nCalling .agg has condensed each subframe back into a single row. This gives us our final output: a DataFrame that is now indexed by \"Year\", with a single row for each unique year in the original babynames DataFrame.\nThere are many different aggregation functions we can use, all of which are useful in different applications.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"min\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n5\n\n\n1911\n5\n\n\n1912\n5\n\n\n1913\n5\n\n\n1914\n5\n\n\n\n\n\n\n\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"max\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n295\n\n\n1911\n390\n\n\n1912\n534\n\n\n1913\n614\n\n\n1914\n773\n\n\n\n\n\n\n\n\n# Same result, but now we explicitly tell pandas to only consider the \"Count\" column when summing\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nThere are many different aggregations that can be applied to the grouped data. The primary requirement is that an aggregation function must:\n\nTake in a Series of data (a single column of the grouped subframe).\nReturn a single value that aggregates this Series.\n\n\n4.2.1 Aggregation Functions\nBecause of this fairly broad requirement, pandas offers many ways of computing an aggregation.\nIn-built Python operations – such as sum, max, and min – are automatically recognized by pandas.\n\n# What is the minimum count for each name in any year?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"min\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n5\n\n\nAadarsh\n6\n\n\nAaden\n10\n\n\nAadhav\n6\n\n\nAadhini\n6\n\n\n\n\n\n\n\n\n# What is the largest single-year count of each name?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"max\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n7\n\n\nAadarsh\n6\n\n\nAaden\n158\n\n\nAadhav\n8\n\n\nAadhini\n6\n\n\n\n\n\n\n\nAs mentioned previously, functions from the NumPy library, such as np.mean, np.max, np.min, and np.sum, are also fair game in pandas.\n\n# What is the average count for each name across all years?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"mean\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n6.000000\n\n\nAadarsh\n6.000000\n\n\nAaden\n46.214286\n\n\nAadhav\n6.750000\n\n\nAadhini\n6.000000\n\n\n\n\n\n\n\npandas also offers a number of in-built functions. Functions that are native to pandas can be referenced using their string name within a call to .agg. Some examples include:\n\n.agg(\"sum\")\n.agg(\"max\")\n.agg(\"min\")\n.agg(\"mean\")\n.agg(\"first\")\n.agg(\"last\")\n\nThe latter two entries in this list – \"first\" and \"last\" – are unique to pandas. They return the first or last entry in a subframe column. Why might this be useful? Consider a case where multiple columns in a group share identical information. To represent this information in the grouped output, we can simply grab the first or last entry, which we know will be identical to all other entries.\nLet’s illustrate this with an example. Say we add a new column to babynames that contains the first letter of each name.\n\n# Imagine we had an additional column, \"First Letter\". We'll explain this code next week\nbabynames[\"First Letter\"] = babynames[\"Name\"].str[0]\n\n# We construct a simplified DataFrame containing just a subset of columns\nbabynames_new = babynames[[\"Name\", \"First Letter\", \"Year\"]]\nbabynames_new.head()\n\n\n\n\n\n\n\n\nName\nFirst Letter\nYear\n\n\n\n\n115957\nDeandrea\nD\n1990\n\n\n101976\nDeandrea\nD\n1986\n\n\n131029\nLeandrea\nL\n1994\n\n\n108731\nDeandrea\nD\n1988\n\n\n308131\nDeandrea\nD\n1985\n\n\n\n\n\n\n\nIf we form groups for each name in the dataset, \"First Letter\" will be the same for all members of the group. This means that if we simply select the first entry for \"First Letter\" in the group, we’ll represent all data in that group.\nWe can use a dictionary to apply different aggregation functions to each column during grouping.\n\n\n\nAggregating using “first”\n\n\n\nbabynames_new.groupby(\"Name\").agg({\"First Letter\":\"first\", \"Year\":\"max\"}).head()\n\n\n\n\n\n\n\n\nFirst Letter\nYear\n\n\nName\n\n\n\n\n\n\nAadan\nA\n2014\n\n\nAadarsh\nA\n2019\n\n\nAaden\nA\n2020\n\n\nAadhav\nA\n2019\n\n\nAadhini\nA\n2022\n\n\n\n\n\n\n\n\n\n4.2.2 Plotting Birth Counts\nLet’s use .agg to find the total number of babies born in each year. Recall that using .agg with .groupby() follows the format: df.groupby(column_name).agg(aggregation_function). The line of code below gives us the total number of babies born in each year.\n\n\nCode\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(sum).head(5)\n# Alternative 1\n# babynames.groupby(\"Year\")[[\"Count\"]].sum()\n# Alternative 2\n# babynames.groupby(\"Year\").sum(numeric_only=True)\n\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98187/390646742.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nPlotting the Dataframe we obtain tells an interesting story.\n\n\nCode\nimport plotly.express as px\npuzzle2 = babynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\")\npx.line(puzzle2, y = \"Count\")\n\n\n                                                \n\n\nA word of warning: we made an enormous assumption when we decided to use this dataset to estimate birth rate. According to this article from the Legistlative Analyst Office, the true number of babies born in California in 2020 was 421,275. However, our plot shows 362,882 babies —— what happened?\n\n\n4.2.3 Summary of the .groupby() Function\nA groupby operation involves some combination of splitting a DataFrame into grouped subframes, applying a function, and combining the results.\nFor some arbitrary DataFrame df below, the code df.groupby(\"year\").agg(sum) does the following:\n\nSplits the DataFrame into sub-DataFrames with rows belonging to the same year.\nApplies the sum function to each column of each sub-DataFrame.\nCombines the results of sum into a single DataFrame, indexed by year.\n\n\n\n\n4.2.4 Revisiting the .agg() Function\n.agg() can take in any function that aggregates several values into one summary value. Some commonly-used aggregation functions can even be called directly, without explicit use of .agg(). For example, we can call .mean() on .groupby():\nbabynames.groupby(\"Year\").mean().head()\nWe can now put this all into practice. Say we want to find the baby name with sex “F” that has fallen in popularity the most in California. To calculate this, we can first create a metric: “Ratio to Peak” (RTP). The RTP is the ratio of babies born with a given name in 2022 to the maximum number of babies born with the name in any year.\nLet’s start with calculating this for one baby, “Jennifer”.\n\n# We filter by babies with sex \"F\" and sort by \"Year\"\nf_babynames = babynames[babynames[\"Sex\"] == \"F\"]\nf_babynames = f_babynames.sort_values([\"Year\"])\n\n# Determine how many Jennifers were born in CA per year\njenn_counts_series = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"]\n\n# Determine the max number of Jennifers born in a year and the number born in 2022 \n# to calculate RTP\nmax_jenn = max(f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"])\ncurr_jenn = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"].iloc[-1]\nrtp = curr_jenn / max_jenn\nrtp\n\nnp.float64(0.018796372629843364)\n\n\nBy creating a function to calculate RTP and applying it to our DataFrame by using .groupby(), we can easily compute the RTP for all names at once!\n\ndef ratio_to_peak(series):\n    return series.iloc[-1] / max(series)\n\n#Using .groupby() to apply the function\nrtp_table = f_babynames.groupby(\"Name\")[[\"Year\", \"Count\"]].agg(ratio_to_peak)\nrtp_table.head()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n\n\n\n\n\nIn the rows shown above, we can see that every row shown has a Year value of 1.0.\nThis is the “pandas-ification” of logic you saw in Data 8. Much of the logic you’ve learned in Data 8 will serve you well in Data 100.\n\n\n4.2.5 Nuisance Columns\nNote that you must be careful with which columns you apply the .agg() function to. If we were to apply our function to the table as a whole by doing f_babynames.groupby(\"Name\").agg(ratio_to_peak), executing our .agg() call would result in a TypeError.\n\nWe can avoid this issue (and prevent unintentional loss of data) by explicitly selecting column(s) we want to apply our aggregation function to BEFORE calling .agg(),\n\n\n4.2.6 Renaming Columns After Grouping\nBy default, .groupby will not rename any aggregated columns. As we can see in the table above, the aggregated column is still named Count even though it now represents the RTP. For better readability, we can rename Count to Count RTP\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n...\n...\n...\n\n\nZyanya\n1.0\n0.466667\n\n\nZyla\n1.0\n1.000000\n\n\nZylah\n1.0\n1.000000\n\n\nZyra\n1.0\n1.000000\n\n\nZyrah\n1.0\n0.833333\n\n\n\n\n13782 rows × 2 columns\n\n\n\n\n\n4.2.7 Some Data Science Payoff\nBy sorting rtp_table, we can see the names whose popularity has decreased the most.\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table.sort_values(\"Count RTP\").head()\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nDebra\n1.0\n0.001260\n\n\nDebbie\n1.0\n0.002815\n\n\nCarol\n1.0\n0.003180\n\n\nTammy\n1.0\n0.003249\n\n\nSusan\n1.0\n0.003305\n\n\n\n\n\n\n\nTo visualize the above DataFrame, let’s look at the line plot below:\n\n\nCode\nimport plotly.express as px\npx.line(f_babynames[f_babynames[\"Name\"] == \"Debra\"], x = \"Year\", y = \"Count\")\n\n\n                                                \n\n\nWe can get the list of the top 10 names and then plot popularity with the following code:\n\ntop10 = rtp_table.sort_values(\"Count RTP\").head(10).index\npx.line(\n    f_babynames[f_babynames[\"Name\"].isin(top10)], \n    x = \"Year\", \n    y = \"Count\", \n    color = \"Name\"\n)\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/plotly/express/_core.py:1980: FutureWarning:\n\nWhen grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.\n\n\n\n                                                \n\n\nAs a quick exercise, consider what code would compute the total number of babies with each name.\n\n\nCode\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"sum\").head()\n# alternative solution: \n# babynames.groupby(\"Name\")[[\"Count\"]].sum()\n\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n18\n\n\nAadarsh\n6\n\n\nAaden\n647\n\n\nAadhav\n27\n\n\nAadhini\n6",
+    "text": "4.2 Aggregating Data with .groupby\nUp until this point, we have been working with individual rows of DataFrames. As data scientists, we often wish to investigate trends across a larger subset of our data. For example, we may want to compute some summary statistic (the mean, median, sum, etc.) for a group of rows in our DataFrame. To do this, we’ll use pandas GroupBy objects. Our goal is to group together rows that fall under the same category and perform an operation that aggregates across all rows in the category.\nLet’s say we wanted to aggregate all rows in babynames for a given year.\n\nbabynames.groupby(\"Year\")\n\n&lt;pandas.core.groupby.generic.DataFrameGroupBy object at 0x10d5f1a90&gt;\n\n\nWhat does this strange output mean? Calling .groupby (documentation) has generated a GroupBy object. You can imagine this as a set of “mini” sub-DataFrames, where each subframe contains all of the rows from babynames that correspond to a particular year.\nThe diagram below shows a simplified view of babynames to help illustrate this idea.\n\n\n\nWe can’t work with a GroupBy object directly – that is why you saw that strange output earlier rather than a standard view of a DataFrame. To actually manipulate values within these “mini” DataFrames, we’ll need to call an aggregation method. This is a method that tells pandas how to aggregate the values within the GroupBy object. Once the aggregation is applied, pandas will return a normal (now grouped) DataFrame.\nThe first aggregation method we’ll consider is .agg. The .agg method takes in a function as its argument; this function is then applied to each column of a “mini” grouped DataFrame. We end up with a new DataFrame with one aggregated row per subframe. Let’s see this in action by finding the sum of all counts for each year in babynames – this is equivalent to finding the number of babies born in each year.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nWe can relate this back to the diagram we used above. Remember that the diagram uses a simplified version of babynames, which is why we see smaller values for the summed counts.\n\n\n\nPerforming an aggregation\n\n\nCalling .agg has condensed each subframe back into a single row. This gives us our final output: a DataFrame that is now indexed by \"Year\", with a single row for each unique year in the original babynames DataFrame.\nThere are many different aggregation functions we can use, all of which are useful in different applications.\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"min\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n5\n\n\n1911\n5\n\n\n1912\n5\n\n\n1913\n5\n\n\n1914\n5\n\n\n\n\n\n\n\n\nbabynames[[\"Year\", \"Count\"]].groupby(\"Year\").agg(\"max\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n295\n\n\n1911\n390\n\n\n1912\n534\n\n\n1913\n614\n\n\n1914\n773\n\n\n\n\n\n\n\n\n# Same result, but now we explicitly tell pandas to only consider the \"Count\" column when summing\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\").head(5)\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nThere are many different aggregations that can be applied to the grouped data. The primary requirement is that an aggregation function must:\n\nTake in a Series of data (a single column of the grouped subframe).\nReturn a single value that aggregates this Series.\n\n\n4.2.1 Aggregation Functions\nBecause of this fairly broad requirement, pandas offers many ways of computing an aggregation.\nIn-built Python operations – such as sum, max, and min – are automatically recognized by pandas.\n\n# What is the minimum count for each name in any year?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"min\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n5\n\n\nAadarsh\n6\n\n\nAaden\n10\n\n\nAadhav\n6\n\n\nAadhini\n6\n\n\n\n\n\n\n\n\n# What is the largest single-year count of each name?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"max\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n7\n\n\nAadarsh\n6\n\n\nAaden\n158\n\n\nAadhav\n8\n\n\nAadhini\n6\n\n\n\n\n\n\n\nAs mentioned previously, functions from the NumPy library, such as np.mean, np.max, np.min, and np.sum, are also fair game in pandas.\n\n# What is the average count for each name across all years?\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"mean\").head()\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n6.000000\n\n\nAadarsh\n6.000000\n\n\nAaden\n46.214286\n\n\nAadhav\n6.750000\n\n\nAadhini\n6.000000\n\n\n\n\n\n\n\npandas also offers a number of in-built functions. Functions that are native to pandas can be referenced using their string name within a call to .agg. Some examples include:\n\n.agg(\"sum\")\n.agg(\"max\")\n.agg(\"min\")\n.agg(\"mean\")\n.agg(\"first\")\n.agg(\"last\")\n\nThe latter two entries in this list – \"first\" and \"last\" – are unique to pandas. They return the first or last entry in a subframe column. Why might this be useful? Consider a case where multiple columns in a group share identical information. To represent this information in the grouped output, we can simply grab the first or last entry, which we know will be identical to all other entries.\nLet’s illustrate this with an example. Say we add a new column to babynames that contains the first letter of each name.\n\n# Imagine we had an additional column, \"First Letter\". We'll explain this code next week\nbabynames[\"First Letter\"] = babynames[\"Name\"].str[0]\n\n# We construct a simplified DataFrame containing just a subset of columns\nbabynames_new = babynames[[\"Name\", \"First Letter\", \"Year\"]]\nbabynames_new.head()\n\n\n\n\n\n\n\n\nName\nFirst Letter\nYear\n\n\n\n\n115957\nDeandrea\nD\n1990\n\n\n101976\nDeandrea\nD\n1986\n\n\n131029\nLeandrea\nL\n1994\n\n\n108731\nDeandrea\nD\n1988\n\n\n308131\nDeandrea\nD\n1985\n\n\n\n\n\n\n\nIf we form groups for each name in the dataset, \"First Letter\" will be the same for all members of the group. This means that if we simply select the first entry for \"First Letter\" in the group, we’ll represent all data in that group.\nWe can use a dictionary to apply different aggregation functions to each column during grouping.\n\n\n\nAggregating using “first”\n\n\n\nbabynames_new.groupby(\"Name\").agg({\"First Letter\":\"first\", \"Year\":\"max\"}).head()\n\n\n\n\n\n\n\n\nFirst Letter\nYear\n\n\nName\n\n\n\n\n\n\nAadan\nA\n2014\n\n\nAadarsh\nA\n2019\n\n\nAaden\nA\n2020\n\n\nAadhav\nA\n2019\n\n\nAadhini\nA\n2022\n\n\n\n\n\n\n\n\n\n4.2.2 Plotting Birth Counts\nLet’s use .agg to find the total number of babies born in each year. Recall that using .agg with .groupby() follows the format: df.groupby(column_name).agg(aggregation_function). The line of code below gives us the total number of babies born in each year.\n\n\nCode\nbabynames.groupby(\"Year\")[[\"Count\"]].agg(sum).head(5)\n# Alternative 1\n# babynames.groupby(\"Year\")[[\"Count\"]].sum()\n# Alternative 2\n# babynames.groupby(\"Year\").sum(numeric_only=True)\n\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32655/390646742.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\n\n\n\n\n\n1910\n9163\n\n\n1911\n9983\n\n\n1912\n17946\n\n\n1913\n22094\n\n\n1914\n26926\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nPlotting the Dataframe we obtain tells an interesting story.\n\n\nCode\nimport plotly.express as px\npuzzle2 = babynames.groupby(\"Year\")[[\"Count\"]].agg(\"sum\")\npx.line(puzzle2, y = \"Count\")\n\n\n                                                \n\n\nA word of warning: we made an enormous assumption when we decided to use this dataset to estimate birth rate. According to this article from the Legistlative Analyst Office, the true number of babies born in California in 2020 was 421,275. However, our plot shows 362,882 babies —— what happened?\n\n\n4.2.3 Summary of the .groupby() Function\nA groupby operation involves some combination of splitting a DataFrame into grouped subframes, applying a function, and combining the results.\nFor some arbitrary DataFrame df below, the code df.groupby(\"year\").agg(sum) does the following:\n\nSplits the DataFrame into sub-DataFrames with rows belonging to the same year.\nApplies the sum function to each column of each sub-DataFrame.\nCombines the results of sum into a single DataFrame, indexed by year.\n\n\n\n\n4.2.4 Revisiting the .agg() Function\n.agg() can take in any function that aggregates several values into one summary value. Some commonly-used aggregation functions can even be called directly, without explicit use of .agg(). For example, we can call .mean() on .groupby():\nbabynames.groupby(\"Year\").mean().head()\nWe can now put this all into practice. Say we want to find the baby name with sex “F” that has fallen in popularity the most in California. To calculate this, we can first create a metric: “Ratio to Peak” (RTP). The RTP is the ratio of babies born with a given name in 2022 to the maximum number of babies born with the name in any year.\nLet’s start with calculating this for one baby, “Jennifer”.\n\n# We filter by babies with sex \"F\" and sort by \"Year\"\nf_babynames = babynames[babynames[\"Sex\"] == \"F\"]\nf_babynames = f_babynames.sort_values([\"Year\"])\n\n# Determine how many Jennifers were born in CA per year\njenn_counts_series = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"]\n\n# Determine the max number of Jennifers born in a year and the number born in 2022 \n# to calculate RTP\nmax_jenn = max(f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"])\ncurr_jenn = f_babynames[f_babynames[\"Name\"] == \"Jennifer\"][\"Count\"].iloc[-1]\nrtp = curr_jenn / max_jenn\nrtp\n\nnp.float64(0.018796372629843364)\n\n\nBy creating a function to calculate RTP and applying it to our DataFrame by using .groupby(), we can easily compute the RTP for all names at once!\n\ndef ratio_to_peak(series):\n    return series.iloc[-1] / max(series)\n\n#Using .groupby() to apply the function\nrtp_table = f_babynames.groupby(\"Name\")[[\"Year\", \"Count\"]].agg(ratio_to_peak)\nrtp_table.head()\n\n\n\n\n\n\n\n\nYear\nCount\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n\n\n\n\n\nIn the rows shown above, we can see that every row shown has a Year value of 1.0.\nThis is the “pandas-ification” of logic you saw in Data 8. Much of the logic you’ve learned in Data 8 will serve you well in Data 100.\n\n\n4.2.5 Nuisance Columns\nNote that you must be careful with which columns you apply the .agg() function to. If we were to apply our function to the table as a whole by doing f_babynames.groupby(\"Name\").agg(ratio_to_peak), executing our .agg() call would result in a TypeError.\n\nWe can avoid this issue (and prevent unintentional loss of data) by explicitly selecting column(s) we want to apply our aggregation function to BEFORE calling .agg(),\n\n\n4.2.6 Renaming Columns After Grouping\nBy default, .groupby will not rename any aggregated columns. As we can see in the table above, the aggregated column is still named Count even though it now represents the RTP. For better readability, we can rename Count to Count RTP\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nAadhini\n1.0\n1.000000\n\n\nAadhira\n1.0\n0.500000\n\n\nAadhya\n1.0\n0.660000\n\n\nAadya\n1.0\n0.586207\n\n\nAahana\n1.0\n0.269231\n\n\n...\n...\n...\n\n\nZyanya\n1.0\n0.466667\n\n\nZyla\n1.0\n1.000000\n\n\nZylah\n1.0\n1.000000\n\n\nZyra\n1.0\n1.000000\n\n\nZyrah\n1.0\n0.833333\n\n\n\n\n13782 rows × 2 columns\n\n\n\n\n\n4.2.7 Some Data Science Payoff\nBy sorting rtp_table, we can see the names whose popularity has decreased the most.\n\nrtp_table = rtp_table.rename(columns = {\"Count\": \"Count RTP\"})\nrtp_table.sort_values(\"Count RTP\").head()\n\n\n\n\n\n\n\n\nYear\nCount RTP\n\n\nName\n\n\n\n\n\n\nDebra\n1.0\n0.001260\n\n\nDebbie\n1.0\n0.002815\n\n\nCarol\n1.0\n0.003180\n\n\nTammy\n1.0\n0.003249\n\n\nSusan\n1.0\n0.003305\n\n\n\n\n\n\n\nTo visualize the above DataFrame, let’s look at the line plot below:\n\n\nCode\nimport plotly.express as px\npx.line(f_babynames[f_babynames[\"Name\"] == \"Debra\"], x = \"Year\", y = \"Count\")\n\n\n                                                \n\n\nWe can get the list of the top 10 names and then plot popularity with the following code:\n\ntop10 = rtp_table.sort_values(\"Count RTP\").head(10).index\npx.line(\n    f_babynames[f_babynames[\"Name\"].isin(top10)], \n    x = \"Year\", \n    y = \"Count\", \n    color = \"Name\"\n)\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/plotly/express/_core.py:1980: FutureWarning:\n\nWhen grouping with a length-1 list-like, you will need to pass a length-1 tuple to get_group in a future version of pandas. Pass `(name,)` instead of `name` to silence this warning.\n\n\n\n                                                \n\n\nAs a quick exercise, consider what code would compute the total number of babies with each name.\n\n\nCode\nbabynames.groupby(\"Name\")[[\"Count\"]].agg(\"sum\").head()\n# alternative solution: \n# babynames.groupby(\"Name\")[[\"Count\"]].sum()\n\n\n\n\n\n\n\n\n\nCount\n\n\nName\n\n\n\n\n\nAadan\n18\n\n\nAadarsh\n6\n\n\nAaden\n647\n\n\nAadhav\n27\n\n\nAadhini\n6",
     "crumbs": [
       "<span class='chapter-number'>4</span>  <span class='chapter-title'>Pandas III</span>"
     ]
@@ -194,7 +194,7 @@
     "href": "pandas_3/pandas_3.html#groupby-continued",
     "title": "4  Pandas III",
     "section": "4.3 .groupby(), Continued",
-    "text": "4.3 .groupby(), Continued\nWe’ll work with the elections DataFrame again.\n\n\nCode\nimport pandas as pd\nimport numpy as np\n\nelections = pd.read_csv(\"data/elections.csv\")\nelections.head(5)\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n0\n1824\nAndrew Jackson\nDemocratic-Republican\n151271\nloss\n57.210122\n\n\n1\n1824\nJohn Quincy Adams\nDemocratic-Republican\n113142\nwin\n42.789878\n\n\n2\n1828\nAndrew Jackson\nDemocratic\n642806\nwin\n56.203927\n\n\n3\n1828\nJohn Quincy Adams\nNational Republican\n500897\nloss\n43.796073\n\n\n4\n1832\nAndrew Jackson\nDemocratic\n702735\nwin\n54.574789\n\n\n\n\n\n\n\n\n4.3.1 Raw GroupBy Objects\nThe result of groupby applied to a DataFrame is a DataFrameGroupBy object, not a DataFrame.\n\ngrouped_by_year = elections.groupby(\"Year\")\ntype(grouped_by_year)\n\npandas.core.groupby.generic.DataFrameGroupBy\n\n\nThere are several ways to look into DataFrameGroupBy objects:\n\ngrouped_by_party = elections.groupby(\"Party\")\ngrouped_by_party.groups\n\n{'American': [22, 126], 'American Independent': [115, 119, 124], 'Anti-Masonic': [6], 'Anti-Monopoly': [38], 'Citizens': [127], 'Communist': [89], 'Constitution': [160, 164, 172], 'Constitutional Union': [24], 'Democratic': [2, 4, 8, 10, 13, 14, 17, 20, 28, 29, 34, 37, 39, 45, 47, 52, 55, 57, 64, 70, 74, 77, 81, 83, 86, 91, 94, 97, 100, 105, 108, 111, 114, 116, 118, 123, 129, 134, 137, 140, 144, 151, 158, 162, 168, 176, 178], 'Democratic-Republican': [0, 1], 'Dixiecrat': [103], 'Farmer–Labor': [78], 'Free Soil': [15, 18], 'Green': [149, 155, 156, 165, 170, 177, 181], 'Greenback': [35], 'Independent': [121, 130, 143, 161, 167, 174], 'Liberal Republican': [31], 'Libertarian': [125, 128, 132, 138, 139, 146, 153, 159, 163, 169, 175, 180], 'National Democratic': [50], 'National Republican': [3, 5], 'National Union': [27], 'Natural Law': [148], 'New Alliance': [136], 'Northern Democratic': [26], 'Populist': [48, 61, 141], 'Progressive': [68, 82, 101, 107], 'Prohibition': [41, 44, 49, 51, 54, 59, 63, 67, 73, 75, 99], 'Reform': [150, 154], 'Republican': [21, 23, 30, 32, 33, 36, 40, 43, 46, 53, 56, 60, 65, 69, 72, 79, 80, 84, 87, 90, 96, 98, 104, 106, 109, 112, 113, 117, 120, 122, 131, 133, 135, 142, 145, 152, 157, 166, 171, 173, 179], 'Socialist': [58, 62, 66, 71, 76, 85, 88, 92, 95, 102], 'Southern Democratic': [25], 'States' Rights': [110], 'Taxpayers': [147], 'Union': [93], 'Union Labor': [42], 'Whig': [7, 9, 11, 12, 16, 19]}\n\n\n\ngrouped_by_party.get_group(\"Socialist\")\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n58\n1904\nEugene V. Debs\nSocialist\n402810\nloss\n2.985897\n\n\n62\n1908\nEugene V. Debs\nSocialist\n420852\nloss\n2.850866\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n71\n1916\nAllan L. Benson\nSocialist\n590524\nloss\n3.194193\n\n\n76\n1920\nEugene V. Debs\nSocialist\n913693\nloss\n3.428282\n\n\n85\n1928\nNorman Thomas\nSocialist\n267478\nloss\n0.728623\n\n\n88\n1932\nNorman Thomas\nSocialist\n884885\nloss\n2.236211\n\n\n92\n1936\nNorman Thomas\nSocialist\n187910\nloss\n0.412876\n\n\n95\n1940\nNorman Thomas\nSocialist\n116599\nloss\n0.234237\n\n\n102\n1948\nNorman Thomas\nSocialist\n139569\nloss\n0.286312\n\n\n\n\n\n\n\n\n\n4.3.2 Other GroupBy Methods\nThere are many aggregation methods we can use with .agg. Some useful options are:\n\n.mean: creates a new DataFrame with the mean value of each group\n.sum: creates a new DataFrame with the sum of each group\n.max and .min: creates a new DataFrame with the maximum/minimum value of each group\n.first and .last: creates a new DataFrame with the first/last row in each group\n.size: creates a new Series with the number of entries in each group\n.count: creates a new DataFrame with the number of entries, excluding missing values.\n\nLet’s illustrate some examples by creating a DataFrame called df.\n\ndf = pd.DataFrame({'letter':['A','A','B','C','C','C'], \n                   'num':[1,2,3,4,np.nan,4], \n                   'state':[np.nan, 'tx', 'fl', 'hi', np.nan, 'ak']})\ndf\n\n\n\n\n\n\n\n\nletter\nnum\nstate\n\n\n\n\n0\nA\n1.0\nNaN\n\n\n1\nA\n2.0\ntx\n\n\n2\nB\n3.0\nfl\n\n\n3\nC\n4.0\nhi\n\n\n4\nC\nNaN\nNaN\n\n\n5\nC\n4.0\nak\n\n\n\n\n\n\n\nNote the slight difference between .size() and .count(): while .size() returns a Series and counts the number of entries including the missing values, .count() returns a DataFrame and counts the number of entries in each column excluding missing values.\n\ndf.groupby(\"letter\").size()\n\nletter\nA    2\nB    1\nC    3\ndtype: int64\n\n\n\ndf.groupby(\"letter\").count()\n\n\n\n\n\n\n\n\nnum\nstate\n\n\nletter\n\n\n\n\n\n\nA\n2\n1\n\n\nB\n1\n1\n\n\nC\n2\n2\n\n\n\n\n\n\n\nYou might recall that the value_counts() function in the previous note does something similar. It turns out value_counts() and groupby.size() are the same, except value_counts() sorts the resulting Series in descending order automatically.\n\ndf[\"letter\"].value_counts()\n\nletter\nC    3\nA    2\nB    1\nName: count, dtype: int64\n\n\nThese (and other) aggregation functions are so common that pandas allows for writing shorthand. Instead of explicitly stating the use of .agg, we can call the function directly on the GroupBy object.\nFor example, the following are equivalent:\n\nelections.groupby(\"Candidate\").agg(mean)\nelections.groupby(\"Candidate\").mean()\n\nThere are many other methods that pandas supports. You can check them out on the pandas documentation.\n\n\n4.3.3 Filtering by Group\nAnother common use for GroupBy objects is to filter data by group.\ngroupby.filter takes an argument func, where func is a function that:\n\nTakes a DataFrame object as input\nReturns a single True or False.\n\ngroupby.filter applies func to each group/sub-DataFrame:\n\nIf func returns True for a group, then all rows belonging to the group are preserved.\nIf func returns False for a group, then all rows belonging to that group are filtered out.\n\nIn other words, sub-DataFrames that correspond to True are returned in the final result, whereas those with a False value are not. Importantly, groupby.filter is different from groupby.agg in that an entire sub-DataFrame is returned in the final DataFrame, not just a single row. As a result, groupby.filter preserves the original indices and the column we grouped on does NOT become the index!\n\nTo illustrate how this happens, let’s go back to the elections dataset. Say we want to identify “tight” election years – that is, we want to find all rows that correspond to election years where all candidates in that year won a similar portion of the total vote. Specifically, let’s find all rows corresponding to a year where no candidate won more than 45% of the total vote.\nIn other words, we want to:\n\nFind the years where the maximum % in that year is less than 45%\nReturn all DataFrame rows that correspond to these years\n\nFor each year, we need to find the maximum % among all rows for that year. If this maximum % is lower than 45%, we will tell pandas to keep all rows corresponding to that year.\n\nelections.groupby(\"Year\").filter(lambda sf: sf[\"%\"].max() &lt; 45).head(9)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n23\n1860\nAbraham Lincoln\nRepublican\n1855993\nwin\n39.699408\n\n\n24\n1860\nJohn Bell\nConstitutional Union\n590901\nloss\n12.639283\n\n\n25\n1860\nJohn C. Breckinridge\nSouthern Democratic\n848019\nloss\n18.138998\n\n\n26\n1860\nStephen A. Douglas\nNorthern Democratic\n1380202\nloss\n29.522311\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n67\n1912\nEugene W. Chafin\nProhibition\n208156\nloss\n1.386325\n\n\n68\n1912\nTheodore Roosevelt\nProgressive\n4122721\nloss\n27.457433\n\n\n69\n1912\nWilliam Taft\nRepublican\n3486242\nloss\n23.218466\n\n\n70\n1912\nWoodrow Wilson\nDemocratic\n6296284\nwin\n41.933422\n\n\n\n\n\n\n\nWhat’s going on here? In this example, we’ve defined our filtering function, func, to be lambda sf: sf[\"%\"].max() &lt; 45. This filtering function will find the maximum \"%\" value among all entries in the grouped sub-DataFrame, which we call sf. If the maximum value is less than 45, then the filter function will return True and all rows in that grouped sub-DataFrame will appear in the final output DataFrame.\nExamine the DataFrame above. Notice how, in this preview of the first 9 rows, all entries from the years 1860 and 1912 appear. This means that in 1860 and 1912, no candidate in that year won more than 45% of the total vote.\nYou may ask: how is the groupby.filter procedure different to the boolean filtering we’ve seen previously? Boolean filtering considers individual rows when applying a boolean condition. For example, the code elections[elections[\"%\"] &lt; 45] will check the \"%\" value of every single row in elections; if it is less than 45, then that row will be kept in the output. groupby.filter, in contrast, applies a boolean condition across all rows in a group. If not all rows in that group satisfy the condition specified by the filter, the entire group will be discarded in the output.\n\n\n4.3.4 Aggregation with lambda Functions\nWhat if we wish to aggregate our DataFrame using a non-standard function – for example, a function of our own design? We can do so by combining .agg with lambda expressions.\nLet’s first consider a puzzle to jog our memory. We will attempt to find the Candidate from each Party with the highest % of votes.\nA naive approach may be to group by the Party column and aggregate by the maximum.\n\nelections.groupby(\"Party\").agg(max).head(10)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98187/4278286395.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function max&gt; is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"max\" instead.\n\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1976\nThomas J. Anderson\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1976\nLester Maddox\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2016\nMichael Peroutka\n203091\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n2020\nWoodrow Wilson\n81268924\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nJohn Quincy Adams\n151271\nwin\n57.210122\n\n\n\n\n\n\n\nThis approach is clearly wrong – the DataFrame claims that Woodrow Wilson won the presidency in 2020.\nWhy is this happening? Here, the max aggregation function is taken over every column independently. Among Democrats, max is computing:\n\nThe most recent Year a Democratic candidate ran for president (2020)\nThe Candidate with the alphabetically “largest” name (“Woodrow Wilson”)\nThe Result with the alphabetically “largest” outcome (“win”)\n\nInstead, let’s try a different approach. We will:\n\nSort the DataFrame so that rows are in descending order of %\nGroup by Party and select the first row of each sub-DataFrame\n\nWhile it may seem unintuitive, sorting elections by descending order of % is extremely helpful. If we then group by Party, the first row of each GroupBy object will contain information about the Candidate with the highest voter %.\n\nelections_sorted_by_percent = elections.sort_values(\"%\", ascending=False)\nelections_sorted_by_percent.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n114\n1964\nLyndon Johnson\nDemocratic\n43127041\nwin\n61.344703\n\n\n91\n1936\nFranklin Roosevelt\nDemocratic\n27752648\nwin\n60.978107\n\n\n120\n1972\nRichard Nixon\nRepublican\n47168710\nwin\n60.907806\n\n\n79\n1920\nWarren Harding\nRepublican\n16144093\nwin\n60.574501\n\n\n133\n1984\nRonald Reagan\nRepublican\n54455472\nwin\n59.023326\n\n\n\n\n\n\n\n\nelections_sorted_by_percent.groupby(\"Party\").agg(lambda x : x.iloc[0]).head(10)\n\n# Equivalent to the below code\n# elections_sorted_by_percent.groupby(\"Party\").agg('first').head(10)\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1856\nMillard Fillmore\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1968\nGeorge Wallace\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2008\nChuck Baldwin\n199750\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n1964\nLyndon Johnson\n43127041\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nAndrew Jackson\n151271\nloss\n57.210122\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nNotice how our code correctly determines that Lyndon Johnson from the Democratic Party has the highest voter %.\nMore generally, lambda functions are used to design custom aggregation functions that aren’t pre-defined by Python. The input parameter x to the lambda function is a GroupBy object. Therefore, it should make sense why lambda x : x.iloc[0] selects the first row in each groupby object.\nIn fact, there’s a few different ways to approach this problem. Each approach has different tradeoffs in terms of readability, performance, memory consumption, complexity, etc. We’ve given a few examples below.\nNote: Understanding these alternative solutions is not required. They are given to demonstrate the vast number of problem-solving approaches in pandas.\n\n# Using the idxmax function\nbest_per_party = elections.loc[elections.groupby('Party')['%'].idxmax()]\nbest_per_party.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n22\n1856\nMillard Fillmore\nAmerican\n873053\nloss\n21.554001\n\n\n115\n1968\nGeorge Wallace\nAmerican Independent\n9901118\nloss\n13.571218\n\n\n6\n1832\nWilliam Wirt\nAnti-Masonic\n100715\nloss\n7.821583\n\n\n38\n1884\nBenjamin Butler\nAnti-Monopoly\n134294\nloss\n1.335838\n\n\n127\n1980\nBarry Commoner\nCitizens\n233052\nloss\n0.270182\n\n\n\n\n\n\n\n\n# Using the .drop_duplicates function\nbest_per_party2 = elections.sort_values('%').drop_duplicates(['Party'], keep='last')\nbest_per_party2.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n148\n1996\nJohn Hagelin\nNatural Law\n113670\nloss\n0.118219\n\n\n164\n2008\nChuck Baldwin\nConstitution\n199750\nloss\n0.152398\n\n\n110\n1956\nT. Coleman Andrews\nStates' Rights\n107929\nloss\n0.174883\n\n\n147\n1996\nHoward Phillips\nTaxpayers\n184656\nloss\n0.192045\n\n\n136\n1988\nLenora Fulani\nNew Alliance\n217221\nloss\n0.237804",
+    "text": "4.3 .groupby(), Continued\nWe’ll work with the elections DataFrame again.\n\n\nCode\nimport pandas as pd\nimport numpy as np\n\nelections = pd.read_csv(\"data/elections.csv\")\nelections.head(5)\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n0\n1824\nAndrew Jackson\nDemocratic-Republican\n151271\nloss\n57.210122\n\n\n1\n1824\nJohn Quincy Adams\nDemocratic-Republican\n113142\nwin\n42.789878\n\n\n2\n1828\nAndrew Jackson\nDemocratic\n642806\nwin\n56.203927\n\n\n3\n1828\nJohn Quincy Adams\nNational Republican\n500897\nloss\n43.796073\n\n\n4\n1832\nAndrew Jackson\nDemocratic\n702735\nwin\n54.574789\n\n\n\n\n\n\n\n\n4.3.1 Raw GroupBy Objects\nThe result of groupby applied to a DataFrame is a DataFrameGroupBy object, not a DataFrame.\n\ngrouped_by_year = elections.groupby(\"Year\")\ntype(grouped_by_year)\n\npandas.core.groupby.generic.DataFrameGroupBy\n\n\nThere are several ways to look into DataFrameGroupBy objects:\n\ngrouped_by_party = elections.groupby(\"Party\")\ngrouped_by_party.groups\n\n{'American': [22, 126], 'American Independent': [115, 119, 124], 'Anti-Masonic': [6], 'Anti-Monopoly': [38], 'Citizens': [127], 'Communist': [89], 'Constitution': [160, 164, 172], 'Constitutional Union': [24], 'Democratic': [2, 4, 8, 10, 13, 14, 17, 20, 28, 29, 34, 37, 39, 45, 47, 52, 55, 57, 64, 70, 74, 77, 81, 83, 86, 91, 94, 97, 100, 105, 108, 111, 114, 116, 118, 123, 129, 134, 137, 140, 144, 151, 158, 162, 168, 176, 178], 'Democratic-Republican': [0, 1], 'Dixiecrat': [103], 'Farmer–Labor': [78], 'Free Soil': [15, 18], 'Green': [149, 155, 156, 165, 170, 177, 181], 'Greenback': [35], 'Independent': [121, 130, 143, 161, 167, 174], 'Liberal Republican': [31], 'Libertarian': [125, 128, 132, 138, 139, 146, 153, 159, 163, 169, 175, 180], 'National Democratic': [50], 'National Republican': [3, 5], 'National Union': [27], 'Natural Law': [148], 'New Alliance': [136], 'Northern Democratic': [26], 'Populist': [48, 61, 141], 'Progressive': [68, 82, 101, 107], 'Prohibition': [41, 44, 49, 51, 54, 59, 63, 67, 73, 75, 99], 'Reform': [150, 154], 'Republican': [21, 23, 30, 32, 33, 36, 40, 43, 46, 53, 56, 60, 65, 69, 72, 79, 80, 84, 87, 90, 96, 98, 104, 106, 109, 112, 113, 117, 120, 122, 131, 133, 135, 142, 145, 152, 157, 166, 171, 173, 179], 'Socialist': [58, 62, 66, 71, 76, 85, 88, 92, 95, 102], 'Southern Democratic': [25], 'States' Rights': [110], 'Taxpayers': [147], 'Union': [93], 'Union Labor': [42], 'Whig': [7, 9, 11, 12, 16, 19]}\n\n\n\ngrouped_by_party.get_group(\"Socialist\")\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n58\n1904\nEugene V. Debs\nSocialist\n402810\nloss\n2.985897\n\n\n62\n1908\nEugene V. Debs\nSocialist\n420852\nloss\n2.850866\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n71\n1916\nAllan L. Benson\nSocialist\n590524\nloss\n3.194193\n\n\n76\n1920\nEugene V. Debs\nSocialist\n913693\nloss\n3.428282\n\n\n85\n1928\nNorman Thomas\nSocialist\n267478\nloss\n0.728623\n\n\n88\n1932\nNorman Thomas\nSocialist\n884885\nloss\n2.236211\n\n\n92\n1936\nNorman Thomas\nSocialist\n187910\nloss\n0.412876\n\n\n95\n1940\nNorman Thomas\nSocialist\n116599\nloss\n0.234237\n\n\n102\n1948\nNorman Thomas\nSocialist\n139569\nloss\n0.286312\n\n\n\n\n\n\n\n\n\n4.3.2 Other GroupBy Methods\nThere are many aggregation methods we can use with .agg. Some useful options are:\n\n.mean: creates a new DataFrame with the mean value of each group\n.sum: creates a new DataFrame with the sum of each group\n.max and .min: creates a new DataFrame with the maximum/minimum value of each group\n.first and .last: creates a new DataFrame with the first/last row in each group\n.size: creates a new Series with the number of entries in each group\n.count: creates a new DataFrame with the number of entries, excluding missing values.\n\nLet’s illustrate some examples by creating a DataFrame called df.\n\ndf = pd.DataFrame({'letter':['A','A','B','C','C','C'], \n                   'num':[1,2,3,4,np.nan,4], \n                   'state':[np.nan, 'tx', 'fl', 'hi', np.nan, 'ak']})\ndf\n\n\n\n\n\n\n\n\nletter\nnum\nstate\n\n\n\n\n0\nA\n1.0\nNaN\n\n\n1\nA\n2.0\ntx\n\n\n2\nB\n3.0\nfl\n\n\n3\nC\n4.0\nhi\n\n\n4\nC\nNaN\nNaN\n\n\n5\nC\n4.0\nak\n\n\n\n\n\n\n\nNote the slight difference between .size() and .count(): while .size() returns a Series and counts the number of entries including the missing values, .count() returns a DataFrame and counts the number of entries in each column excluding missing values.\n\ndf.groupby(\"letter\").size()\n\nletter\nA    2\nB    1\nC    3\ndtype: int64\n\n\n\ndf.groupby(\"letter\").count()\n\n\n\n\n\n\n\n\nnum\nstate\n\n\nletter\n\n\n\n\n\n\nA\n2\n1\n\n\nB\n1\n1\n\n\nC\n2\n2\n\n\n\n\n\n\n\nYou might recall that the value_counts() function in the previous note does something similar. It turns out value_counts() and groupby.size() are the same, except value_counts() sorts the resulting Series in descending order automatically.\n\ndf[\"letter\"].value_counts()\n\nletter\nC    3\nA    2\nB    1\nName: count, dtype: int64\n\n\nThese (and other) aggregation functions are so common that pandas allows for writing shorthand. Instead of explicitly stating the use of .agg, we can call the function directly on the GroupBy object.\nFor example, the following are equivalent:\n\nelections.groupby(\"Candidate\").agg(mean)\nelections.groupby(\"Candidate\").mean()\n\nThere are many other methods that pandas supports. You can check them out on the pandas documentation.\n\n\n4.3.3 Filtering by Group\nAnother common use for GroupBy objects is to filter data by group.\ngroupby.filter takes an argument func, where func is a function that:\n\nTakes a DataFrame object as input\nReturns a single True or False.\n\ngroupby.filter applies func to each group/sub-DataFrame:\n\nIf func returns True for a group, then all rows belonging to the group are preserved.\nIf func returns False for a group, then all rows belonging to that group are filtered out.\n\nIn other words, sub-DataFrames that correspond to True are returned in the final result, whereas those with a False value are not. Importantly, groupby.filter is different from groupby.agg in that an entire sub-DataFrame is returned in the final DataFrame, not just a single row. As a result, groupby.filter preserves the original indices and the column we grouped on does NOT become the index!\n\nTo illustrate how this happens, let’s go back to the elections dataset. Say we want to identify “tight” election years – that is, we want to find all rows that correspond to election years where all candidates in that year won a similar portion of the total vote. Specifically, let’s find all rows corresponding to a year where no candidate won more than 45% of the total vote.\nIn other words, we want to:\n\nFind the years where the maximum % in that year is less than 45%\nReturn all DataFrame rows that correspond to these years\n\nFor each year, we need to find the maximum % among all rows for that year. If this maximum % is lower than 45%, we will tell pandas to keep all rows corresponding to that year.\n\nelections.groupby(\"Year\").filter(lambda sf: sf[\"%\"].max() &lt; 45).head(9)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n23\n1860\nAbraham Lincoln\nRepublican\n1855993\nwin\n39.699408\n\n\n24\n1860\nJohn Bell\nConstitutional Union\n590901\nloss\n12.639283\n\n\n25\n1860\nJohn C. Breckinridge\nSouthern Democratic\n848019\nloss\n18.138998\n\n\n26\n1860\nStephen A. Douglas\nNorthern Democratic\n1380202\nloss\n29.522311\n\n\n66\n1912\nEugene V. Debs\nSocialist\n901551\nloss\n6.004354\n\n\n67\n1912\nEugene W. Chafin\nProhibition\n208156\nloss\n1.386325\n\n\n68\n1912\nTheodore Roosevelt\nProgressive\n4122721\nloss\n27.457433\n\n\n69\n1912\nWilliam Taft\nRepublican\n3486242\nloss\n23.218466\n\n\n70\n1912\nWoodrow Wilson\nDemocratic\n6296284\nwin\n41.933422\n\n\n\n\n\n\n\nWhat’s going on here? In this example, we’ve defined our filtering function, func, to be lambda sf: sf[\"%\"].max() &lt; 45. This filtering function will find the maximum \"%\" value among all entries in the grouped sub-DataFrame, which we call sf. If the maximum value is less than 45, then the filter function will return True and all rows in that grouped sub-DataFrame will appear in the final output DataFrame.\nExamine the DataFrame above. Notice how, in this preview of the first 9 rows, all entries from the years 1860 and 1912 appear. This means that in 1860 and 1912, no candidate in that year won more than 45% of the total vote.\nYou may ask: how is the groupby.filter procedure different to the boolean filtering we’ve seen previously? Boolean filtering considers individual rows when applying a boolean condition. For example, the code elections[elections[\"%\"] &lt; 45] will check the \"%\" value of every single row in elections; if it is less than 45, then that row will be kept in the output. groupby.filter, in contrast, applies a boolean condition across all rows in a group. If not all rows in that group satisfy the condition specified by the filter, the entire group will be discarded in the output.\n\n\n4.3.4 Aggregation with lambda Functions\nWhat if we wish to aggregate our DataFrame using a non-standard function – for example, a function of our own design? We can do so by combining .agg with lambda expressions.\nLet’s first consider a puzzle to jog our memory. We will attempt to find the Candidate from each Party with the highest % of votes.\nA naive approach may be to group by the Party column and aggregate by the maximum.\n\nelections.groupby(\"Party\").agg(max).head(10)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32655/4278286395.py:1: FutureWarning:\n\nThe provided callable &lt;built-in function max&gt; is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"max\" instead.\n\n\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1976\nThomas J. Anderson\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1976\nLester Maddox\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2016\nMichael Peroutka\n203091\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n2020\nWoodrow Wilson\n81268924\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nJohn Quincy Adams\n151271\nwin\n57.210122\n\n\n\n\n\n\n\nThis approach is clearly wrong – the DataFrame claims that Woodrow Wilson won the presidency in 2020.\nWhy is this happening? Here, the max aggregation function is taken over every column independently. Among Democrats, max is computing:\n\nThe most recent Year a Democratic candidate ran for president (2020)\nThe Candidate with the alphabetically “largest” name (“Woodrow Wilson”)\nThe Result with the alphabetically “largest” outcome (“win”)\n\nInstead, let’s try a different approach. We will:\n\nSort the DataFrame so that rows are in descending order of %\nGroup by Party and select the first row of each sub-DataFrame\n\nWhile it may seem unintuitive, sorting elections by descending order of % is extremely helpful. If we then group by Party, the first row of each GroupBy object will contain information about the Candidate with the highest voter %.\n\nelections_sorted_by_percent = elections.sort_values(\"%\", ascending=False)\nelections_sorted_by_percent.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n114\n1964\nLyndon Johnson\nDemocratic\n43127041\nwin\n61.344703\n\n\n91\n1936\nFranklin Roosevelt\nDemocratic\n27752648\nwin\n60.978107\n\n\n120\n1972\nRichard Nixon\nRepublican\n47168710\nwin\n60.907806\n\n\n79\n1920\nWarren Harding\nRepublican\n16144093\nwin\n60.574501\n\n\n133\n1984\nRonald Reagan\nRepublican\n54455472\nwin\n59.023326\n\n\n\n\n\n\n\n\nelections_sorted_by_percent.groupby(\"Party\").agg(lambda x : x.iloc[0]).head(10)\n\n# Equivalent to the below code\n# elections_sorted_by_percent.groupby(\"Party\").agg('first').head(10)\n\n\n\n\n\n\n\n\nYear\nCandidate\nPopular vote\nResult\n%\n\n\nParty\n\n\n\n\n\n\n\n\n\nAmerican\n1856\nMillard Fillmore\n873053\nloss\n21.554001\n\n\nAmerican Independent\n1968\nGeorge Wallace\n9901118\nloss\n13.571218\n\n\nAnti-Masonic\n1832\nWilliam Wirt\n100715\nloss\n7.821583\n\n\nAnti-Monopoly\n1884\nBenjamin Butler\n134294\nloss\n1.335838\n\n\nCitizens\n1980\nBarry Commoner\n233052\nloss\n0.270182\n\n\nCommunist\n1932\nWilliam Z. Foster\n103307\nloss\n0.261069\n\n\nConstitution\n2008\nChuck Baldwin\n199750\nloss\n0.152398\n\n\nConstitutional Union\n1860\nJohn Bell\n590901\nloss\n12.639283\n\n\nDemocratic\n1964\nLyndon Johnson\n43127041\nwin\n61.344703\n\n\nDemocratic-Republican\n1824\nAndrew Jackson\n151271\nloss\n57.210122\n\n\n\n\n\n\n\nHere’s an illustration of the process:\n\nNotice how our code correctly determines that Lyndon Johnson from the Democratic Party has the highest voter %.\nMore generally, lambda functions are used to design custom aggregation functions that aren’t pre-defined by Python. The input parameter x to the lambda function is a GroupBy object. Therefore, it should make sense why lambda x : x.iloc[0] selects the first row in each groupby object.\nIn fact, there’s a few different ways to approach this problem. Each approach has different tradeoffs in terms of readability, performance, memory consumption, complexity, etc. We’ve given a few examples below.\nNote: Understanding these alternative solutions is not required. They are given to demonstrate the vast number of problem-solving approaches in pandas.\n\n# Using the idxmax function\nbest_per_party = elections.loc[elections.groupby('Party')['%'].idxmax()]\nbest_per_party.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n22\n1856\nMillard Fillmore\nAmerican\n873053\nloss\n21.554001\n\n\n115\n1968\nGeorge Wallace\nAmerican Independent\n9901118\nloss\n13.571218\n\n\n6\n1832\nWilliam Wirt\nAnti-Masonic\n100715\nloss\n7.821583\n\n\n38\n1884\nBenjamin Butler\nAnti-Monopoly\n134294\nloss\n1.335838\n\n\n127\n1980\nBarry Commoner\nCitizens\n233052\nloss\n0.270182\n\n\n\n\n\n\n\n\n# Using the .drop_duplicates function\nbest_per_party2 = elections.sort_values('%').drop_duplicates(['Party'], keep='last')\nbest_per_party2.head(5)\n\n\n\n\n\n\n\n\nYear\nCandidate\nParty\nPopular vote\nResult\n%\n\n\n\n\n148\n1996\nJohn Hagelin\nNatural Law\n113670\nloss\n0.118219\n\n\n164\n2008\nChuck Baldwin\nConstitution\n199750\nloss\n0.152398\n\n\n110\n1956\nT. Coleman Andrews\nStates' Rights\n107929\nloss\n0.174883\n\n\n147\n1996\nHoward Phillips\nTaxpayers\n184656\nloss\n0.192045\n\n\n136\n1988\nLenora Fulani\nNew Alliance\n217221\nloss\n0.237804",
     "crumbs": [
       "<span class='chapter-number'>4</span>  <span class='chapter-title'>Pandas III</span>"
     ]
@@ -204,7 +204,7 @@
     "href": "pandas_3/pandas_3.html#aggregating-data-with-pivot-tables",
     "title": "4  Pandas III",
     "section": "4.4 Aggregating Data with Pivot Tables",
-    "text": "4.4 Aggregating Data with Pivot Tables\nWe know now that .groupby gives us the ability to group and aggregate data across our DataFrame. The examples above formed groups using just one column in the DataFrame. It’s possible to group by multiple columns at once by passing in a list of column names to .groupby.\nLet’s consider the babynames dataset again. In this problem, we will find the total number of baby names associated with each sex for each year. To do this, we’ll group by both the \"Year\" and \"Sex\" columns.\n\nbabynames.head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\nFirst Letter\n\n\n\n\n115957\nCA\nF\n1990\nDeandrea\n5\nD\n\n\n101976\nCA\nF\n1986\nDeandrea\n6\nD\n\n\n131029\nCA\nF\n1994\nLeandrea\n5\nL\n\n\n108731\nCA\nF\n1988\nDeandrea\n5\nD\n\n\n308131\nCA\nM\n1985\nDeandrea\n6\nD\n\n\n\n\n\n\n\n\n# Find the total number of baby names associated with each sex for each \n# year in the data\nbabynames.groupby([\"Year\", \"Sex\"])[[\"Count\"]].agg(sum).head(6)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98187/3186035650.py:3: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\nSex\n\n\n\n\n\n1910\nF\n5950\n\n\nM\n3213\n\n\n1911\nF\n6602\n\n\nM\n3381\n\n\n1912\nF\n9804\n\n\nM\n8142\n\n\n\n\n\n\n\nNotice that both \"Year\" and \"Sex\" serve as the index of the DataFrame (they are both rendered in bold). We’ve created a multi-index DataFrame where two different index values, the year and sex, are used to uniquely identify each row.\nThis isn’t the most intuitive way of representing this data – and, because multi-indexed DataFrames have multiple dimensions in their index, they can often be difficult to use.\nAnother strategy to aggregate across two columns is to create a pivot table. You saw these back in Data 8. One set of values is used to create the index of the pivot table; another set is used to define the column names. The values contained in each cell of the table correspond to the aggregated data for each index-column pair.\nHere’s an illustration of the process:\n\nThe best way to understand pivot tables is to see one in action. Let’s return to our original goal of summing the total number of names associated with each combination of year and sex. We’ll call the pandas .pivot_table method to create a new table.\n\n# The `pivot_table` method is used to generate a Pandas pivot table\nimport numpy as np\nbabynames.pivot_table(\n    index = \"Year\",\n    columns = \"Sex\",    \n    values = \"Count\", \n    aggfunc = \"sum\", \n).head(5)\n\n\n\n\n\n\n\nSex\nF\nM\n\n\nYear\n\n\n\n\n\n\n1910\n5950\n3213\n\n\n1911\n6602\n3381\n\n\n1912\n9804\n8142\n\n\n1913\n11860\n10234\n\n\n1914\n13815\n13111\n\n\n\n\n\n\n\nLooks a lot better! Now, our DataFrame is structured with clear index-column combinations. Each entry in the pivot table represents the summed count of names for a given combination of \"Year\" and \"Sex\".\nLet’s take a closer look at the code implemented above.\n\nindex = \"Year\" specifies the column name in the original DataFrame that should be used as the index of the pivot table\ncolumns = \"Sex\" specifies the column name in the original DataFrame that should be used to generate the columns of the pivot table\nvalues = \"Count\" indicates what values from the original DataFrame should be used to populate the entry for each index-column combination\naggfunc = np.sum tells pandas what function to use when aggregating the data specified by values. Here, we are summing the name counts for each pair of \"Year\" and \"Sex\"\n\nWe can even include multiple values in the index or columns of our pivot tables.\n\nbabynames_pivot = babynames.pivot_table(\n    index=\"Year\",     # the rows (turned into index)\n    columns=\"Sex\",    # the column values\n    values=[\"Count\", \"Name\"], \n    aggfunc=\"max\",      # group operation\n)\nbabynames_pivot.head(6)\n\n\n\n\n\n\n\n\nCount\nName\n\n\nSex\nF\nM\nF\nM\n\n\nYear\n\n\n\n\n\n\n\n\n1910\n295\n237\nYvonne\nWilliam\n\n\n1911\n390\n214\nZelma\nWillis\n\n\n1912\n534\n501\nYvonne\nWoodrow\n\n\n1913\n584\n614\nZelma\nYoshio\n\n\n1914\n773\n769\nZelma\nYoshio\n\n\n1915\n998\n1033\nZita\nYukio\n\n\n\n\n\n\n\nNote that each row provides the number of girls and number of boys having that year’s most common name, and also lists the alphabetically largest girl name and boy name. The counts for number of girls/boys in the resulting DataFrame do not correspond to the names listed. For example, in 1910, the most popular girl name is given to 295 girls, but that name was likely not Yvonne.",
+    "text": "4.4 Aggregating Data with Pivot Tables\nWe know now that .groupby gives us the ability to group and aggregate data across our DataFrame. The examples above formed groups using just one column in the DataFrame. It’s possible to group by multiple columns at once by passing in a list of column names to .groupby.\nLet’s consider the babynames dataset again. In this problem, we will find the total number of baby names associated with each sex for each year. To do this, we’ll group by both the \"Year\" and \"Sex\" columns.\n\nbabynames.head()\n\n\n\n\n\n\n\n\nState\nSex\nYear\nName\nCount\nFirst Letter\n\n\n\n\n115957\nCA\nF\n1990\nDeandrea\n5\nD\n\n\n101976\nCA\nF\n1986\nDeandrea\n6\nD\n\n\n131029\nCA\nF\n1994\nLeandrea\n5\nL\n\n\n108731\nCA\nF\n1988\nDeandrea\n5\nD\n\n\n308131\nCA\nM\n1985\nDeandrea\n6\nD\n\n\n\n\n\n\n\n\n# Find the total number of baby names associated with each sex for each \n# year in the data\nbabynames.groupby([\"Year\", \"Sex\"])[[\"Count\"]].agg(sum).head(6)\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32655/3186035650.py:3: FutureWarning:\n\nThe provided callable &lt;built-in function sum&gt; is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string \"sum\" instead.\n\n\n\n\n\n\n\n\n\n\n\nCount\n\n\nYear\nSex\n\n\n\n\n\n1910\nF\n5950\n\n\nM\n3213\n\n\n1911\nF\n6602\n\n\nM\n3381\n\n\n1912\nF\n9804\n\n\nM\n8142\n\n\n\n\n\n\n\nNotice that both \"Year\" and \"Sex\" serve as the index of the DataFrame (they are both rendered in bold). We’ve created a multi-index DataFrame where two different index values, the year and sex, are used to uniquely identify each row.\nThis isn’t the most intuitive way of representing this data – and, because multi-indexed DataFrames have multiple dimensions in their index, they can often be difficult to use.\nAnother strategy to aggregate across two columns is to create a pivot table. You saw these back in Data 8. One set of values is used to create the index of the pivot table; another set is used to define the column names. The values contained in each cell of the table correspond to the aggregated data for each index-column pair.\nHere’s an illustration of the process:\n\nThe best way to understand pivot tables is to see one in action. Let’s return to our original goal of summing the total number of names associated with each combination of year and sex. We’ll call the pandas .pivot_table method to create a new table.\n\n# The `pivot_table` method is used to generate a Pandas pivot table\nimport numpy as np\nbabynames.pivot_table(\n    index = \"Year\",\n    columns = \"Sex\",    \n    values = \"Count\", \n    aggfunc = \"sum\", \n).head(5)\n\n\n\n\n\n\n\nSex\nF\nM\n\n\nYear\n\n\n\n\n\n\n1910\n5950\n3213\n\n\n1911\n6602\n3381\n\n\n1912\n9804\n8142\n\n\n1913\n11860\n10234\n\n\n1914\n13815\n13111\n\n\n\n\n\n\n\nLooks a lot better! Now, our DataFrame is structured with clear index-column combinations. Each entry in the pivot table represents the summed count of names for a given combination of \"Year\" and \"Sex\".\nLet’s take a closer look at the code implemented above.\n\nindex = \"Year\" specifies the column name in the original DataFrame that should be used as the index of the pivot table\ncolumns = \"Sex\" specifies the column name in the original DataFrame that should be used to generate the columns of the pivot table\nvalues = \"Count\" indicates what values from the original DataFrame should be used to populate the entry for each index-column combination\naggfunc = np.sum tells pandas what function to use when aggregating the data specified by values. Here, we are summing the name counts for each pair of \"Year\" and \"Sex\"\n\nWe can even include multiple values in the index or columns of our pivot tables.\n\nbabynames_pivot = babynames.pivot_table(\n    index=\"Year\",     # the rows (turned into index)\n    columns=\"Sex\",    # the column values\n    values=[\"Count\", \"Name\"], \n    aggfunc=\"max\",      # group operation\n)\nbabynames_pivot.head(6)\n\n\n\n\n\n\n\n\nCount\nName\n\n\nSex\nF\nM\nF\nM\n\n\nYear\n\n\n\n\n\n\n\n\n1910\n295\n237\nYvonne\nWilliam\n\n\n1911\n390\n214\nZelma\nWillis\n\n\n1912\n534\n501\nYvonne\nWoodrow\n\n\n1913\n584\n614\nZelma\nYoshio\n\n\n1914\n773\n769\nZelma\nYoshio\n\n\n1915\n998\n1033\nZita\nYukio\n\n\n\n\n\n\n\nNote that each row provides the number of girls and number of boys having that year’s most common name, and also lists the alphabetically largest girl name and boy name. The counts for number of girls/boys in the resulting DataFrame do not correspond to the names listed. For example, in 1910, the most popular girl name is given to 295 girls, but that name was likely not Yvonne.",
     "crumbs": [
       "<span class='chapter-number'>4</span>  <span class='chapter-title'>Pandas III</span>"
     ]
@@ -254,7 +254,7 @@
     "href": "eda/eda.html#granularity-scope-and-temporality",
     "title": "5  Data Cleaning and EDA",
     "section": "5.2 Granularity, Scope, and Temporality",
-    "text": "5.2 Granularity, Scope, and Temporality\nAfter understanding the structure of the dataset, the next task is to determine what exactly the data represents. We’ll do so by considering the data’s granularity, scope, and temporality.\n\n5.2.1 Granularity\nThe granularity of a dataset is what a single row represents. You can also think of it as the level of detail included in the data. To determine the data’s granularity, ask: what does each row in the dataset represent? Fine-grained data contains a high level of detail, with a single row representing a small individual unit. For example, each record may represent one person. Coarse-grained data is encoded such that a single row represents a large individual unit – for example, each record may represent a group of people.\n\n\n5.2.2 Scope\nThe scope of a dataset is the subset of the population covered by the data. If we were investigating student performance in Data Science courses, a dataset with a narrow scope might encompass all students enrolled in Data 100 whereas a dataset with an expansive scope might encompass all students in California.\n\n\n5.2.3 Temporality\nThe temporality of a dataset describes the periodicity over which the data was collected as well as when the data was most recently collected or updated.\nTime and date fields of a dataset could represent a few things:\n\nwhen the “event” happened\nwhen the data was collected, or when it was entered into the system\nwhen the data was copied into the database\n\nTo fully understand the temporality of the data, it also may be necessary to standardize time zones or inspect recurring time-based trends in the data (do patterns recur in 24-hour periods? Over the course of a month? Seasonally?). The convention for standardizing time is the Coordinated Universal Time (UTC), an international time standard measured at 0 degrees latitude that stays consistent throughout the year (no daylight savings). We can represent Berkeley’s time zone, Pacific Standard Time (PST), as UTC-7 (with daylight savings).\n\n5.2.3.1 Temporality with pandas’ dt accessors\nLet’s briefly look at how we can use pandas’ dt accessors to work with dates/times in a dataset using the dataset you’ll see in Lab 3: the Berkeley PD Calls for Service dataset.\n\n\nCode\ncalls = pd.read_csv(\"data/Berkeley_PD_-_Calls_for_Service.csv\")\ncalls.head()\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n04/19/2021 12:00:00 AM\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n02/13/2021 12:00:00 AM\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n02/08/2021 12:00:00 AM\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nLooks like there are three columns with dates/times: EVENTDT, EVENTTM, and InDbDate.\nMost likely, EVENTDT stands for the date when the event took place, EVENTTM stands for the time of day the event took place (in 24-hr format), and InDbDate is the date this call is recorded onto the database.\nIf we check the data type of these columns, we will see they are stored as strings. We can convert them to datetime objects using pandas to_datetime function.\n\ncalls[\"EVENTDT\"] = pd.to_datetime(calls[\"EVENTDT\"])\ncalls.head()\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98202/874729699.py:1: UserWarning:\n\nCould not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n2021-04-19\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n2021-02-13\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n2021-02-08\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nNow, we can use the dt accessor on this column.\nWe can get the month:\n\ncalls[\"EVENTDT\"].dt.month.head()\n\n0    4\n1    4\n2    4\n3    2\n4    2\nName: EVENTDT, dtype: int32\n\n\nWhich day of the week the date is on:\n\ncalls[\"EVENTDT\"].dt.dayofweek.head()\n\n0    3\n1    3\n2    0\n3    5\n4    0\nName: EVENTDT, dtype: int32\n\n\nCheck the mimimum values to see if there are any suspicious-looking, 70s dates:\n\ncalls.sort_values(\"EVENTDT\").head()\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n2513\n20057398\nBURGLARY COMMERCIAL\n2020-12-17\n16:05\nBURGLARY - COMMERCIAL\n4\n06/15/2021 12:00:00 AM\n600 BLOCK GILMAN ST\\nBerkeley, CA\\n(37.878405,...\n600 BLOCK GILMAN ST\nBerkeley\nCA\n\n\n624\n20057207\nASSAULT/BATTERY MISD.\n2020-12-17\n16:50\nASSAULT\n4\n06/15/2021 12:00:00 AM\n2100 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.871...\n2100 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n154\n20092214\nTHEFT FROM AUTO\n2020-12-17\n18:30\nLARCENY - FROM VEHICLE\n4\n06/15/2021 12:00:00 AM\n800 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.8918...\n800 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n659\n20057324\nTHEFT MISD. (UNDER $950)\n2020-12-17\n15:44\nLARCENY\n4\n06/15/2021 12:00:00 AM\n1800 BLOCK 4TH ST\\nBerkeley, CA\\n(37.869888, -...\n1800 BLOCK 4TH ST\nBerkeley\nCA\n\n\n993\n20057573\nBURGLARY RESIDENTIAL\n2020-12-17\n22:15\nBURGLARY - RESIDENTIAL\n4\n06/15/2021 12:00:00 AM\n1700 BLOCK STUART ST\\nBerkeley, CA\\n(37.857495...\n1700 BLOCK STUART ST\nBerkeley\nCA\n\n\n\n\n\n\n\nDoesn’t look like it! We are good!\nWe can also do many things with the dt accessor like switching time zones and converting time back to UNIX/POSIX time. Check out the documentation on .dt accessor and time series/date functionality.",
+    "text": "5.2 Granularity, Scope, and Temporality\nAfter understanding the structure of the dataset, the next task is to determine what exactly the data represents. We’ll do so by considering the data’s granularity, scope, and temporality.\n\n5.2.1 Granularity\nThe granularity of a dataset is what a single row represents. You can also think of it as the level of detail included in the data. To determine the data’s granularity, ask: what does each row in the dataset represent? Fine-grained data contains a high level of detail, with a single row representing a small individual unit. For example, each record may represent one person. Coarse-grained data is encoded such that a single row represents a large individual unit – for example, each record may represent a group of people.\n\n\n5.2.2 Scope\nThe scope of a dataset is the subset of the population covered by the data. If we were investigating student performance in Data Science courses, a dataset with a narrow scope might encompass all students enrolled in Data 100 whereas a dataset with an expansive scope might encompass all students in California.\n\n\n5.2.3 Temporality\nThe temporality of a dataset describes the periodicity over which the data was collected as well as when the data was most recently collected or updated.\nTime and date fields of a dataset could represent a few things:\n\nwhen the “event” happened\nwhen the data was collected, or when it was entered into the system\nwhen the data was copied into the database\n\nTo fully understand the temporality of the data, it also may be necessary to standardize time zones or inspect recurring time-based trends in the data (do patterns recur in 24-hour periods? Over the course of a month? Seasonally?). The convention for standardizing time is the Coordinated Universal Time (UTC), an international time standard measured at 0 degrees latitude that stays consistent throughout the year (no daylight savings). We can represent Berkeley’s time zone, Pacific Standard Time (PST), as UTC-7 (with daylight savings).\n\n5.2.3.1 Temporality with pandas’ dt accessors\nLet’s briefly look at how we can use pandas’ dt accessors to work with dates/times in a dataset using the dataset you’ll see in Lab 3: the Berkeley PD Calls for Service dataset.\n\n\nCode\ncalls = pd.read_csv(\"data/Berkeley_PD_-_Calls_for_Service.csv\")\ncalls.head()\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n04/01/2021 12:00:00 AM\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n04/19/2021 12:00:00 AM\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n02/13/2021 12:00:00 AM\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n02/08/2021 12:00:00 AM\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nLooks like there are three columns with dates/times: EVENTDT, EVENTTM, and InDbDate.\nMost likely, EVENTDT stands for the date when the event took place, EVENTTM stands for the time of day the event took place (in 24-hr format), and InDbDate is the date this call is recorded onto the database.\nIf we check the data type of these columns, we will see they are stored as strings. We can convert them to datetime objects using pandas to_datetime function.\n\ncalls[\"EVENTDT\"] = pd.to_datetime(calls[\"EVENTDT\"])\ncalls.head()\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32669/874729699.py:1: UserWarning:\n\nCould not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n\n\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n0\n21014296\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:58\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n1\n21014391\nTHEFT MISD. (UNDER $950)\n2021-04-01\n10:38\nLARCENY\n4\n06/15/2021 12:00:00 AM\nBerkeley, CA\\n(37.869058, -122.270455)\nNaN\nBerkeley\nCA\n\n\n2\n21090494\nTHEFT MISD. (UNDER $950)\n2021-04-19\n12:15\nLARCENY\n1\n06/15/2021 12:00:00 AM\n2100 BLOCK HASTE ST\\nBerkeley, CA\\n(37.864908,...\n2100 BLOCK HASTE ST\nBerkeley\nCA\n\n\n3\n21090204\nTHEFT FELONY (OVER $950)\n2021-02-13\n17:00\nLARCENY\n6\n06/15/2021 12:00:00 AM\n2600 BLOCK WARRING ST\\nBerkeley, CA\\n(37.86393...\n2600 BLOCK WARRING ST\nBerkeley\nCA\n\n\n4\n21090179\nBURGLARY AUTO\n2021-02-08\n6:20\nBURGLARY - VEHICLE\n1\n06/15/2021 12:00:00 AM\n2700 BLOCK GARBER ST\\nBerkeley, CA\\n(37.86066,...\n2700 BLOCK GARBER ST\nBerkeley\nCA\n\n\n\n\n\n\n\nNow, we can use the dt accessor on this column.\nWe can get the month:\n\ncalls[\"EVENTDT\"].dt.month.head()\n\n0    4\n1    4\n2    4\n3    2\n4    2\nName: EVENTDT, dtype: int32\n\n\nWhich day of the week the date is on:\n\ncalls[\"EVENTDT\"].dt.dayofweek.head()\n\n0    3\n1    3\n2    0\n3    5\n4    0\nName: EVENTDT, dtype: int32\n\n\nCheck the mimimum values to see if there are any suspicious-looking, 70s dates:\n\ncalls.sort_values(\"EVENTDT\").head()\n\n\n\n\n\n\n\n\nCASENO\nOFFENSE\nEVENTDT\nEVENTTM\nCVLEGEND\nCVDOW\nInDbDate\nBlock_Location\nBLKADDR\nCity\nState\n\n\n\n\n2513\n20057398\nBURGLARY COMMERCIAL\n2020-12-17\n16:05\nBURGLARY - COMMERCIAL\n4\n06/15/2021 12:00:00 AM\n600 BLOCK GILMAN ST\\nBerkeley, CA\\n(37.878405,...\n600 BLOCK GILMAN ST\nBerkeley\nCA\n\n\n624\n20057207\nASSAULT/BATTERY MISD.\n2020-12-17\n16:50\nASSAULT\n4\n06/15/2021 12:00:00 AM\n2100 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.871...\n2100 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n154\n20092214\nTHEFT FROM AUTO\n2020-12-17\n18:30\nLARCENY - FROM VEHICLE\n4\n06/15/2021 12:00:00 AM\n800 BLOCK SHATTUCK AVE\\nBerkeley, CA\\n(37.8918...\n800 BLOCK SHATTUCK AVE\nBerkeley\nCA\n\n\n659\n20057324\nTHEFT MISD. (UNDER $950)\n2020-12-17\n15:44\nLARCENY\n4\n06/15/2021 12:00:00 AM\n1800 BLOCK 4TH ST\\nBerkeley, CA\\n(37.869888, -...\n1800 BLOCK 4TH ST\nBerkeley\nCA\n\n\n993\n20057573\nBURGLARY RESIDENTIAL\n2020-12-17\n22:15\nBURGLARY - RESIDENTIAL\n4\n06/15/2021 12:00:00 AM\n1700 BLOCK STUART ST\\nBerkeley, CA\\n(37.857495...\n1700 BLOCK STUART ST\nBerkeley\nCA\n\n\n\n\n\n\n\nDoesn’t look like it! We are good!\nWe can also do many things with the dt accessor like switching time zones and converting time back to UNIX/POSIX time. Check out the documentation on .dt accessor and time series/date functionality.",
     "crumbs": [
       "<span class='chapter-number'>5</span>  <span class='chapter-title'>Data Cleaning and EDA</span>"
     ]
@@ -284,7 +284,7 @@
     "href": "eda/eda.html#eda-demo-2-mauna-loa-co2-data-a-lesson-in-data-faithfulness",
     "title": "5  Data Cleaning and EDA",
     "section": "5.5 EDA Demo 2: Mauna Loa CO2 Data – A Lesson in Data Faithfulness",
-    "text": "5.5 EDA Demo 2: Mauna Loa CO2 Data – A Lesson in Data Faithfulness\nMauna Loa Observatory has been monitoring CO2 concentrations since 1958.\n\nco2_file = \"data/co2_mm_mlo.txt\"\n\nLet’s do some EDA!!\n\n5.5.1 Reading this file into Pandas?\nLet’s instead check out this .txt file. Some questions to keep in mind: Do we trust this file extension? What structure is it?\nLines 71-78 (inclusive) are shown below:\nline number |                            file contents\n\n71          |   #            decimal     average   interpolated    trend    #days\n72          |   #             date                             (season corr)\n73          |   1958   3    1958.208      315.71      315.71      314.62     -1\n74          |   1958   4    1958.292      317.45      317.45      315.29     -1\n75          |   1958   5    1958.375      317.50      317.50      314.71     -1\n76          |   1958   6    1958.458      -99.99      317.10      314.85     -1\n77          |   1958   7    1958.542      315.86      315.86      314.98     -1\n78          |   1958   8    1958.625      314.93      314.93      315.94     -1\nNotice how:\n\nThe values are separated by white space, possibly tabs.\nThe data line up down the rows. For example, the month appears in 7th to 8th position of each line.\nThe 71st and 72nd lines in the file contain column headings split over two lines.\n\nWe can use read_csv to read the data into a pandas DataFrame, and we provide several arguments to specify that the separators are white space, there is no header (we will set our own column names), and to skip the first 72 rows of the file.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = r'\\s+'       #delimiter for continuous whitespace (stay tuned for regex next lecture))\n)\nco2.head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nCongratulations! You’ve wrangled the data!\n\n…But our columns aren’t named. We need to do more EDA.\n\n\n5.5.2 Exploring Variable Feature Types\nThe NOAA webpage might have some useful tidbits (in this case it doesn’t).\nUsing this information, we’ll rerun pd.read_csv, but this time with some custom column names.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = '\\s+', #regex for continuous whitespace (next lecture)\n    names = ['Yr', 'Mo', 'DecDate', 'Avg', 'Int', 'Trend', 'Days']\n)\nco2.head()\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98202/150137587.py:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\n\n5.5.3 Visualizing CO2\nScientific studies tend to have very clean data, right…? Let’s jump right in and make a time series plot of CO2 monthly averages.\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2);\n\n\n\n\n\n\n\n\n\nThe code above uses the seaborn plotting library (abbreviated sns). We will cover this in the Visualization lecture, but now you don’t need to worry about how it works!\nYikes! Plotting the data uncovered a problem. The sharp vertical lines suggest that we have some missing values. What happened here?\n\nco2.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\nco2.tail()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n733\n2019\n4\n2019.29\n413.32\n413.32\n410.49\n26\n\n\n734\n2019\n5\n2019.38\n414.66\n414.66\n411.20\n28\n\n\n735\n2019\n6\n2019.46\n413.92\n413.92\n411.58\n27\n\n\n736\n2019\n7\n2019.54\n411.77\n411.77\n411.43\n23\n\n\n737\n2019\n8\n2019.62\n409.95\n409.95\n411.84\n29\n\n\n\n\n\n\n\nSome data have unusual values like -1 and -99.99.\nLet’s check the description at the top of the file again.\n\n-1 signifies a missing value for the number of days Days the equipment was in operation that month.\n-99.99 denotes a missing monthly average Avg\n\nHow can we fix this? First, let’s explore other aspects of our data. Understanding our data will help us decide what to do with the missing values.\n\n\n\n5.5.4 Sanity Checks: Reasoning about the data\nFirst, we consider the shape of the data. How many rows should we have?\n\nIf chronological order, we should have one record per month.\nData from March 1958 to August 2019.\nWe should have $ 12 (2019-1957) - 2 - 4 = 738 $ records.\n\n\nco2.shape\n\n(738, 7)\n\n\nNice!! The number of rows (i.e. records) match our expectations.\nLet’s now check the quality of each feature.\n\n\n5.5.5 Understanding Missing Value 1: Days\nDays is a time field, so let’s analyze other time fields to see if there is an explanation for missing values of days of operation.\nLet’s start with months, Mo.\nAre we missing any records? The number of months should have 62 or 61 instances (March 1957-August 2019).\n\nco2[\"Mo\"].value_counts().sort_index()\n\nMo\n1     61\n2     61\n3     62\n4     62\n5     62\n6     62\n7     62\n8     62\n9     61\n10    61\n11    61\n12    61\nName: count, dtype: int64\n\n\nAs expected Jan, Feb, Sep, Oct, Nov, and Dec have 61 occurrences and the rest 62.\n\nNext let’s explore days Days itself, which is the number of days that the measurement equipment worked.\n\n\nCode\nsns.displot(co2['Days']);\nplt.title(\"Distribution of days feature\"); # suppresses unneeded plotting output\n\n\n\n\n\n\n\n\n\nIn terms of data quality, a handful of months have averages based on measurements taken on fewer than half the days. In addition, there are nearly 200 missing values–that’s about 27% of the data!\n\nFinally, let’s check the last time feature, year Yr.\nLet’s check to see if there is any connection between missing-ness and the year of the recording.\n\n\nCode\nsns.scatterplot(x=\"Yr\", y=\"Days\", data=co2);\nplt.title(\"Day field by Year\"); # the ; suppresses output\n\n\n\n\n\n\n\n\n\nObservations:\n\nAll of the missing data are in the early years of operation.\nIt appears there may have been problems with equipment in the mid to late 80s.\n\nPotential Next Steps:\n\nConfirm these explanations through documentation about the historical readings.\nMaybe drop the earliest recordings? However, we would want to delay such action until after we have examined the time trends and assess whether there are any potential problems.\n\n\n\n\n5.5.6 Understanding Missing Value 2: Avg\nNext, let’s return to the -99.99 values in Avg to analyze the overall quality of the CO2 measurements. We’ll plot a histogram of the average CO2 measurements\n\n\nCode\n# Histograms of average CO2 measurements\nsns.displot(co2['Avg']);\n\n\n\n\n\n\n\n\n\nThe non-missing values are in the 300-400 range (a regular range of CO2 levels).\nWe also see that there are only a few missing Avg values (&lt;1% of values). Let’s examine all of them:\n\nco2[co2[\"Avg\"] &lt; 0]\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n7\n1958\n10\n1958.79\n-99.99\n312.66\n315.61\n-1\n\n\n71\n1964\n2\n1964.12\n-99.99\n320.07\n319.61\n-1\n\n\n72\n1964\n3\n1964.21\n-99.99\n320.73\n319.55\n-1\n\n\n73\n1964\n4\n1964.29\n-99.99\n321.77\n319.48\n-1\n\n\n213\n1975\n12\n1975.96\n-99.99\n330.59\n331.60\n0\n\n\n313\n1984\n4\n1984.29\n-99.99\n346.84\n344.27\n2\n\n\n\n\n\n\n\nThere doesn’t seem to be a pattern to these values, other than that most records also were missing Days data.\n\n\n5.5.7 Drop, NaN, or Impute Missing Avg Data?\nHow should we address the invalid Avg data?\n\nDrop records\nSet to NaN\nImpute using some strategy\n\nRemember we want to fix the following plot:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2)\nplt.title(\"CO2 Average By Month\");\n\n\n\n\n\n\n\n\n\nSince we are plotting Avg vs DecDate, we should just focus on dealing with missing values for Avg.\nLet’s consider a few options: 1. Drop those records 2. Replace -99.99 with NaN 3. Substitute it with a likely value for the average CO2?\nWhat do you think are the pros and cons of each possible action?\nLet’s examine each of these three options.\n\n# 1. Drop missing values\nco2_drop = co2[co2['Avg'] &gt; 0]\nco2_drop.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n5\n1958\n8\n1958.62\n314.93\n314.93\n315.94\n-1\n\n\n\n\n\n\n\n\n# 2. Replace NaN with -99.99\nco2_NA = co2.replace(-99.99, np.nan)\nco2_NA.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\nNaN\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWe’ll also use a third version of the data.\nFirst, we note that the dataset already comes with a substitute value for the -99.99.\nFrom the file description:\n\nThe interpolated column includes average values from the preceding column (average) and interpolated values where data are missing. Interpolated values are computed in two steps…\n\nThe Int feature has values that exactly match those in Avg, except when Avg is -99.99, and then a reasonable estimate is used instead.\nSo, the third version of our data will use the Int feature instead of Avg.\n\n# 3. Use interpolated column which estimates missing Avg values\nco2_impute = co2.copy()\nco2_impute['Avg'] = co2['Int']\nco2_impute.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n317.10\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWhat’s a reasonable estimate?\nTo answer this question, let’s zoom in on a short time period, say the measurements in 1958 (where we know we have two missing values).\n\n\nCode\n# results of plotting data in 1958\n\ndef line_and_points(data, ax, title):\n    # assumes single year, hence Mo\n    ax.plot('Mo', 'Avg', data=data)\n    ax.scatter('Mo', 'Avg', data=data)\n    ax.set_xlim(2, 13)\n    ax.set_title(title)\n    ax.set_xticks(np.arange(3, 13))\n\ndef data_year(data, year):\n    return data[data[\"Yr\"] == 1958]\n    \n# uses matplotlib subplots\n# you may see more next week; focus on output for now\nfig, axes = plt.subplots(ncols = 3, figsize=(12, 4), sharey=True)\n\nyear = 1958\nline_and_points(data_year(co2_drop, year), axes[0], title=\"1. Drop Missing\")\nline_and_points(data_year(co2_NA, year), axes[1], title=\"2. Missing Set to NaN\")\nline_and_points(data_year(co2_impute, year), axes[2], title=\"3. Missing Interpolated\")\n\nfig.suptitle(f\"Monthly Averages for {year}\")\nplt.tight_layout()\n\n\n\n\n\n\n\n\n\nIn the big picture since there are only 7 Avg values missing (&lt;1% of 738 months), any of these approaches would work.\nHowever there is some appeal to option C, Imputing:\n\nShows seasonal trends for CO2\nWe are plotting all months in our data as a line plot\n\nLet’s replot our original figure with option 3:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2_impute)\nplt.title(\"CO2 Average By Month, Imputed\");\n\n\n\n\n\n\n\n\n\nLooks pretty close to what we see on the NOAA website!\n\n\n5.5.8 Presenting the Data: A Discussion on Data Granularity\nFrom the description:\n\nMonthly measurements are averages of average day measurements.\nThe NOAA GML website has datasets for daily/hourly measurements too.\n\nThe data you present depends on your research question.\nHow do CO2 levels vary by season?\n\nYou might want to keep average monthly data.\n\nAre CO2 levels rising over the past 50+ years, consistent with global warming predictions?\n\nYou might be happier with a coarser granularity of average year data!\n\n\n\nCode\nco2_year = co2_impute.groupby('Yr').mean()\nsns.lineplot(x='Yr', y='Avg', data=co2_year)\nplt.title(\"CO2 Average By Year\");\n\n\n\n\n\n\n\n\n\nIndeed, we see a rise by nearly 100 ppm of CO2 since Mauna Loa began recording in 1958.",
+    "text": "5.5 EDA Demo 2: Mauna Loa CO2 Data – A Lesson in Data Faithfulness\nMauna Loa Observatory has been monitoring CO2 concentrations since 1958.\n\nco2_file = \"data/co2_mm_mlo.txt\"\n\nLet’s do some EDA!!\n\n5.5.1 Reading this file into Pandas?\nLet’s instead check out this .txt file. Some questions to keep in mind: Do we trust this file extension? What structure is it?\nLines 71-78 (inclusive) are shown below:\nline number |                            file contents\n\n71          |   #            decimal     average   interpolated    trend    #days\n72          |   #             date                             (season corr)\n73          |   1958   3    1958.208      315.71      315.71      314.62     -1\n74          |   1958   4    1958.292      317.45      317.45      315.29     -1\n75          |   1958   5    1958.375      317.50      317.50      314.71     -1\n76          |   1958   6    1958.458      -99.99      317.10      314.85     -1\n77          |   1958   7    1958.542      315.86      315.86      314.98     -1\n78          |   1958   8    1958.625      314.93      314.93      315.94     -1\nNotice how:\n\nThe values are separated by white space, possibly tabs.\nThe data line up down the rows. For example, the month appears in 7th to 8th position of each line.\nThe 71st and 72nd lines in the file contain column headings split over two lines.\n\nWe can use read_csv to read the data into a pandas DataFrame, and we provide several arguments to specify that the separators are white space, there is no header (we will set our own column names), and to skip the first 72 rows of the file.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = r'\\s+'       #delimiter for continuous whitespace (stay tuned for regex next lecture))\n)\nco2.head()\n\n\n\n\n\n\n\n\n0\n1\n2\n3\n4\n5\n6\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nCongratulations! You’ve wrangled the data!\n\n…But our columns aren’t named. We need to do more EDA.\n\n\n5.5.2 Exploring Variable Feature Types\nThe NOAA webpage might have some useful tidbits (in this case it doesn’t).\nUsing this information, we’ll rerun pd.read_csv, but this time with some custom column names.\n\nco2 = pd.read_csv(\n    co2_file, header = None, skiprows = 72,\n    sep = '\\s+', #regex for continuous whitespace (next lecture)\n    names = ['Yr', 'Mo', 'DecDate', 'Avg', 'Int', 'Trend', 'Days']\n)\nco2.head()\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n&lt;&gt;:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32669/150137587.py:3: SyntaxWarning:\n\ninvalid escape sequence '\\s'\n\n\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\n\n5.5.3 Visualizing CO2\nScientific studies tend to have very clean data, right…? Let’s jump right in and make a time series plot of CO2 monthly averages.\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2);\n\n\n\n\n\n\n\n\n\nThe code above uses the seaborn plotting library (abbreviated sns). We will cover this in the Visualization lecture, but now you don’t need to worry about how it works!\nYikes! Plotting the data uncovered a problem. The sharp vertical lines suggest that we have some missing values. What happened here?\n\nco2.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\n\nco2.tail()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n733\n2019\n4\n2019.29\n413.32\n413.32\n410.49\n26\n\n\n734\n2019\n5\n2019.38\n414.66\n414.66\n411.20\n28\n\n\n735\n2019\n6\n2019.46\n413.92\n413.92\n411.58\n27\n\n\n736\n2019\n7\n2019.54\n411.77\n411.77\n411.43\n23\n\n\n737\n2019\n8\n2019.62\n409.95\n409.95\n411.84\n29\n\n\n\n\n\n\n\nSome data have unusual values like -1 and -99.99.\nLet’s check the description at the top of the file again.\n\n-1 signifies a missing value for the number of days Days the equipment was in operation that month.\n-99.99 denotes a missing monthly average Avg\n\nHow can we fix this? First, let’s explore other aspects of our data. Understanding our data will help us decide what to do with the missing values.\n\n\n\n5.5.4 Sanity Checks: Reasoning about the data\nFirst, we consider the shape of the data. How many rows should we have?\n\nIf chronological order, we should have one record per month.\nData from March 1958 to August 2019.\nWe should have $ 12 (2019-1957) - 2 - 4 = 738 $ records.\n\n\nco2.shape\n\n(738, 7)\n\n\nNice!! The number of rows (i.e. records) match our expectations.\nLet’s now check the quality of each feature.\n\n\n5.5.5 Understanding Missing Value 1: Days\nDays is a time field, so let’s analyze other time fields to see if there is an explanation for missing values of days of operation.\nLet’s start with months, Mo.\nAre we missing any records? The number of months should have 62 or 61 instances (March 1957-August 2019).\n\nco2[\"Mo\"].value_counts().sort_index()\n\nMo\n1     61\n2     61\n3     62\n4     62\n5     62\n6     62\n7     62\n8     62\n9     61\n10    61\n11    61\n12    61\nName: count, dtype: int64\n\n\nAs expected Jan, Feb, Sep, Oct, Nov, and Dec have 61 occurrences and the rest 62.\n\nNext let’s explore days Days itself, which is the number of days that the measurement equipment worked.\n\n\nCode\nsns.displot(co2['Days']);\nplt.title(\"Distribution of days feature\"); # suppresses unneeded plotting output\n\n\n\n\n\n\n\n\n\nIn terms of data quality, a handful of months have averages based on measurements taken on fewer than half the days. In addition, there are nearly 200 missing values–that’s about 27% of the data!\n\nFinally, let’s check the last time feature, year Yr.\nLet’s check to see if there is any connection between missing-ness and the year of the recording.\n\n\nCode\nsns.scatterplot(x=\"Yr\", y=\"Days\", data=co2);\nplt.title(\"Day field by Year\"); # the ; suppresses output\n\n\n\n\n\n\n\n\n\nObservations:\n\nAll of the missing data are in the early years of operation.\nIt appears there may have been problems with equipment in the mid to late 80s.\n\nPotential Next Steps:\n\nConfirm these explanations through documentation about the historical readings.\nMaybe drop the earliest recordings? However, we would want to delay such action until after we have examined the time trends and assess whether there are any potential problems.\n\n\n\n\n5.5.6 Understanding Missing Value 2: Avg\nNext, let’s return to the -99.99 values in Avg to analyze the overall quality of the CO2 measurements. We’ll plot a histogram of the average CO2 measurements\n\n\nCode\n# Histograms of average CO2 measurements\nsns.displot(co2['Avg']);\n\n\n\n\n\n\n\n\n\nThe non-missing values are in the 300-400 range (a regular range of CO2 levels).\nWe also see that there are only a few missing Avg values (&lt;1% of values). Let’s examine all of them:\n\nco2[co2[\"Avg\"] &lt; 0]\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n3\n1958\n6\n1958.46\n-99.99\n317.10\n314.85\n-1\n\n\n7\n1958\n10\n1958.79\n-99.99\n312.66\n315.61\n-1\n\n\n71\n1964\n2\n1964.12\n-99.99\n320.07\n319.61\n-1\n\n\n72\n1964\n3\n1964.21\n-99.99\n320.73\n319.55\n-1\n\n\n73\n1964\n4\n1964.29\n-99.99\n321.77\n319.48\n-1\n\n\n213\n1975\n12\n1975.96\n-99.99\n330.59\n331.60\n0\n\n\n313\n1984\n4\n1984.29\n-99.99\n346.84\n344.27\n2\n\n\n\n\n\n\n\nThere doesn’t seem to be a pattern to these values, other than that most records also were missing Days data.\n\n\n5.5.7 Drop, NaN, or Impute Missing Avg Data?\nHow should we address the invalid Avg data?\n\nDrop records\nSet to NaN\nImpute using some strategy\n\nRemember we want to fix the following plot:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2)\nplt.title(\"CO2 Average By Month\");\n\n\n\n\n\n\n\n\n\nSince we are plotting Avg vs DecDate, we should just focus on dealing with missing values for Avg.\nLet’s consider a few options: 1. Drop those records 2. Replace -99.99 with NaN 3. Substitute it with a likely value for the average CO2?\nWhat do you think are the pros and cons of each possible action?\nLet’s examine each of these three options.\n\n# 1. Drop missing values\nco2_drop = co2[co2['Avg'] &gt; 0]\nco2_drop.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n5\n1958\n8\n1958.62\n314.93\n314.93\n315.94\n-1\n\n\n\n\n\n\n\n\n# 2. Replace NaN with -99.99\nco2_NA = co2.replace(-99.99, np.nan)\nco2_NA.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\nNaN\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWe’ll also use a third version of the data.\nFirst, we note that the dataset already comes with a substitute value for the -99.99.\nFrom the file description:\n\nThe interpolated column includes average values from the preceding column (average) and interpolated values where data are missing. Interpolated values are computed in two steps…\n\nThe Int feature has values that exactly match those in Avg, except when Avg is -99.99, and then a reasonable estimate is used instead.\nSo, the third version of our data will use the Int feature instead of Avg.\n\n# 3. Use interpolated column which estimates missing Avg values\nco2_impute = co2.copy()\nco2_impute['Avg'] = co2['Int']\nco2_impute.head()\n\n\n\n\n\n\n\n\nYr\nMo\nDecDate\nAvg\nInt\nTrend\nDays\n\n\n\n\n0\n1958\n3\n1958.21\n315.71\n315.71\n314.62\n-1\n\n\n1\n1958\n4\n1958.29\n317.45\n317.45\n315.29\n-1\n\n\n2\n1958\n5\n1958.38\n317.50\n317.50\n314.71\n-1\n\n\n3\n1958\n6\n1958.46\n317.10\n317.10\n314.85\n-1\n\n\n4\n1958\n7\n1958.54\n315.86\n315.86\n314.98\n-1\n\n\n\n\n\n\n\nWhat’s a reasonable estimate?\nTo answer this question, let’s zoom in on a short time period, say the measurements in 1958 (where we know we have two missing values).\n\n\nCode\n# results of plotting data in 1958\n\ndef line_and_points(data, ax, title):\n    # assumes single year, hence Mo\n    ax.plot('Mo', 'Avg', data=data)\n    ax.scatter('Mo', 'Avg', data=data)\n    ax.set_xlim(2, 13)\n    ax.set_title(title)\n    ax.set_xticks(np.arange(3, 13))\n\ndef data_year(data, year):\n    return data[data[\"Yr\"] == 1958]\n    \n# uses matplotlib subplots\n# you may see more next week; focus on output for now\nfig, axes = plt.subplots(ncols = 3, figsize=(12, 4), sharey=True)\n\nyear = 1958\nline_and_points(data_year(co2_drop, year), axes[0], title=\"1. Drop Missing\")\nline_and_points(data_year(co2_NA, year), axes[1], title=\"2. Missing Set to NaN\")\nline_and_points(data_year(co2_impute, year), axes[2], title=\"3. Missing Interpolated\")\n\nfig.suptitle(f\"Monthly Averages for {year}\")\nplt.tight_layout()\n\n\n\n\n\n\n\n\n\nIn the big picture since there are only 7 Avg values missing (&lt;1% of 738 months), any of these approaches would work.\nHowever there is some appeal to option C, Imputing:\n\nShows seasonal trends for CO2\nWe are plotting all months in our data as a line plot\n\nLet’s replot our original figure with option 3:\n\n\nCode\nsns.lineplot(x='DecDate', y='Avg', data=co2_impute)\nplt.title(\"CO2 Average By Month, Imputed\");\n\n\n\n\n\n\n\n\n\nLooks pretty close to what we see on the NOAA website!\n\n\n5.5.8 Presenting the Data: A Discussion on Data Granularity\nFrom the description:\n\nMonthly measurements are averages of average day measurements.\nThe NOAA GML website has datasets for daily/hourly measurements too.\n\nThe data you present depends on your research question.\nHow do CO2 levels vary by season?\n\nYou might want to keep average monthly data.\n\nAre CO2 levels rising over the past 50+ years, consistent with global warming predictions?\n\nYou might be happier with a coarser granularity of average year data!\n\n\n\nCode\nco2_year = co2_impute.groupby('Yr').mean()\nsns.lineplot(x='Yr', y='Avg', data=co2_year)\nplt.title(\"CO2 Average By Year\");\n\n\n\n\n\n\n\n\n\nIndeed, we see a rise by nearly 100 ppm of CO2 since Mauna Loa began recording in 1958.",
     "crumbs": [
       "<span class='chapter-number'>5</span>  <span class='chapter-title'>Data Cleaning and EDA</span>"
     ]
@@ -544,7 +544,7 @@
     "href": "sampling/sampling.html#probability-samples",
     "title": "9  Sampling",
     "section": "9.3 Probability Samples",
-    "text": "9.3 Probability Samples\nWhen sampling, it is essential to focus on the quality of the sample rather than the quantity of the sample. A huge sample size does not fix a bad sampling method. Our main goal is to gather a sample that is representative of the population it came from. In this section, we’ll explore the different types of sampling and their pros and cons.\nA convenience sample is whatever you can get ahold of; this type of sampling is non-random. Note that haphazard sampling is not necessarily random sampling; there are many potential sources of bias.\nIn a probability sample, we provide the chance that any specified set of individuals will be in the sample (individuals in the population can have different chances of being selected; they don’t all have to be uniform), and we sample at random based off this known chance. For this reason, probability samples are also called random samples. The randomness provides a few benefits:\n\nBecause we know the source probabilities, we can measure the errors.\nSampling at random gives us a more representative sample of the population, which reduces bias. (Note: this is only the case when the probability distribution we’re sampling from is accurate. Random samples using “bad” or inaccurate distributions can produce biased estimates of population quantities.)\nProbability samples allow us to estimate the bias and chance error, which helps us quantify uncertainty (more in a future lecture).\n\nThe real world is usually more complicated, and we often don’t know the initial probabilities. For example, we do not generally know the probability that a given bacterium is in a microbiome sample or whether people will answer when Gallup calls landlines. That being said, still we try to model probability sampling to the best of our ability even when the sampling or measurement process is not fully under our control.\nA few common random sampling schemes:\n\nA uniform random sample with replacement is a sample drawn uniformly at random with replacement.\n\nRandom doesn’t always mean “uniformly at random,” but in this specific context, it does.\nSome individuals in the population might get picked more than once.\n\nA simple random sample (SRS) is a sample drawn uniformly at random without replacement.\n\nEvery individual (and subset of individuals) has the same chance of being selected from the sampling frame.\nEvery pair has the same chance as every other pair.\nEvery triple has the same chance as every other triple.\nAnd so on.\n\nA stratified random sample, where random sampling is performed on strata (specific groups), and the groups together compose a sample.\n\n\n9.3.1 Example Scheme 1: Probability Sample\nSuppose we have 3 TA’s (Arman, Boyu, Charlie): I decide to sample 2 of them as follows:\n\nI choose A with probability 1.0\nI choose either B or C, each with a probability of 0.5.\n\nWe can list all the possible outcomes and their respective probabilities in a table:\n\n\n\nOutcome\nProbability\n\n\n\n\n{A, B}\n0.5\n\n\n{A, C}\n0.5\n\n\n{B, C}\n0\n\n\n\nThis is a probability sample (though not a great one). Of the 3 people in my population, I know the chance of getting each subset. Suppose I’m measuring the average distance TAs live from campus.\n\nThis scheme does not see the entire population!\nMy estimate using the single sample I take has some chance error depending on if I see AB or AC.\nThis scheme is biased towards A’s response.\n\n\n\n9.3.2 Example Scheme 2: Simple Random Sample\nConsider the following sampling scheme:\n\nA class roster has 1100 students listed alphabetically.\nPick one of the first 10 students on the list at random (e.g. Student 8).\nTo create your sample, take that student and every 10th student listed after that (e.g. Students 8, 18, 28, 38, etc.).\n\n\n\nIs this a probability sample?\n\nYes. For a sample [n, n + 10, n + 20, …, n + 1090], where 1 &lt;= n &lt;= 10, the probability of that sample is 1/10. Otherwise, the probability is 0.\nOnly 10 possible samples!\n\n\n\nDoes each student have the same probability of being selected?\n\nYes. Each student is chosen with a probability of 1/10.\n\n\n\nIs this a simple random sample?\n\nNo. The chance of selecting (8, 18) is 1/10; the chance of selecting (8, 9) is 0.\n\n\n\n9.3.3 Demo: Barbie v. Oppenheimer\nWe are trying to collect a sample from Berkeley residents to predict the which one of Barbie and Oppenheimer would perform better on their opening day, July 21st.\nFirst, let’s grab a dataset that has every single resident in Berkeley (this is a fake dataset) and which movie they actually watched on July 21st.\nLet’s load in the movie.csv table. We can assume that:\n\nis_male is a boolean that indicates if a resident identifies as male.\nThere are only two movies they can watch on July 21st: Barbie and Oppenheimer.\nEvery resident watches a movie (either Barbie or Oppenheimer) on July 21st.\n\n\n\nCode\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\n\nsns.set_theme(style='darkgrid', font_scale = 1.5,\n              rc={'figure.figsize':(7,5)})\n\nrng = np.random.default_rng()\n\n\n\nmovie = pd.read_csv(\"data/movie.csv\")\n\n# create a 1/0 int that indicates Barbie vote\nmovie['barbie'] = (movie['movie'] == 'Barbie').astype(int)\nmovie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nmovie\nbarbie\n\n\n\n\n0\n35\nFalse\nBarbie\n1\n\n\n1\n42\nTrue\nOppenheimer\n0\n\n\n2\n55\nFalse\nBarbie\n1\n\n\n3\n77\nTrue\nOppenheimer\n0\n\n\n4\n31\nFalse\nBarbie\n1\n\n\n\n\n\n\n\nWhat fraction of Berkeley residents chose Barbie?\n\nactual_barbie = np.mean(movie[\"barbie\"])\nactual_barbie\n\nnp.float64(0.5302792307692308)\n\n\nThis is the actual outcome of the competition. Based on this result, Barbie would win. How did our sample of retirees do?\n\n9.3.3.1 Convenience Sample: Retirees\nLet’s take a convenience sample of people who have retired (&gt;= 65 years old). What proportion of them went to see Barbie instead of Oppenheimer?\n\nconvenience_sample = movie[movie['age'] &gt;= 65] # take a convenience sample of retirees\nnp.mean(convenience_sample[\"barbie\"]) # what proportion of them saw Barbie? \n\nnp.float64(0.3744755089093924)\n\n\nBased on this result, we would have predicted that Oppenheimer would win! What happened? Is it possible that our sample is too small or noisy?\n\n# what's the size of our sample? \nlen(convenience_sample)\n\n359396\n\n\n\n# what proportion of our data is in the convenience sample? \nlen(convenience_sample)/len(movie)\n\n0.27645846153846154\n\n\nSeems like our sample is rather large (roughly 360,000 people), so the error is likely not due to solely to chance.\n\n\n9.3.3.2 Check for Bias\nLet us aggregate all choices by age and visualize the fraction of Barbie views, split by gender.\n\nvotes_by_barbie = movie.groupby([\"age\",\"is_male\"]).agg(\"mean\", numeric_only=True).reset_index()\nvotes_by_barbie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nbarbie\n\n\n\n\n0\n18\nFalse\n0.819594\n\n\n1\n18\nTrue\n0.667001\n\n\n2\n19\nFalse\n0.812214\n\n\n3\n19\nTrue\n0.661252\n\n\n4\n20\nFalse\n0.805281\n\n\n\n\n\n\n\n\n\nCode\n# A common matplotlib/seaborn pattern: create the figure and axes object, pass ax\n# to seaborn for drawing into, and later fine-tune the figure via ax.\nfig, ax = plt.subplots();\n\nred_blue = [\"#bf1518\", \"#397eb7\"]\nwith sns.color_palette(red_blue):\n    sns.pointplot(data=votes_by_barbie, x = \"age\", y = \"barbie\", hue = \"is_male\", ax=ax)\n\nnew_ticks = [i.get_text() for i in ax.get_xticklabels()]\nax.set_xticks(range(0, len(new_ticks), 10), new_ticks[::10])\nax.set_title(\"Preferences by Demographics\");\n\n\n\n\n\n\n\n\n\n\nWe see that retirees (in Berkeley) tend to watch Oppenheimer.\nWe also see that residents who identify as non-male tend to prefer Barbie.\n\n\n\n9.3.3.3 Simple Random Sample\nSuppose we took a simple random sample (SRS) of the same size as our retiree sample:\n\nn = len(convenience_sample)\nrandom_sample = movie.sample(n, replace = False) ## By default, replace = False\nnp.mean(random_sample[\"barbie\"])\n\nnp.float64(0.5302813609500384)\n\n\nThis is very close to the actual vote of 0.5302792307692308!\nIt turns out that we can get similar results with a much smaller sample size, say, 800:\n\nn = 800\nrandom_sample = movie.sample(n, replace = False)\n\n# Compute the sample average and the resulting relative error\nsample_barbie = np.mean(random_sample[\"barbie\"])\nerr = abs(sample_barbie-actual_barbie)/actual_barbie\n\n# We can print output with Markdown formatting too...\nfrom IPython.display import Markdown\nMarkdown(f\"**Actual** = {actual_barbie:.4f}, **Sample** = {sample_barbie:.4f}, \"\n         f\"**Err** = {100*err:.2f}%.\")\n\nActual = 0.5303, Sample = 0.5387, Err = 1.60%.\n\n\nWe’ll learn how to choose this number when we (re)learn the Central Limit Theorem later in the semester.\n\n\n9.3.3.4 Quantifying Chance Error\nIn our SRS of size 800, what would be our chance error?\nLet’s simulate 1000 versions of taking the 800-sized SRS from before:\n\nnrep = 1000   # number of simulations\nn = 800       # size of our sample\npoll_result = []\nfor i in range(0, nrep):\n    random_sample = movie.sample(n, replace = False)\n    poll_result.append(np.mean(random_sample[\"barbie\"]))\n\n\n\nCode\nfig, ax = plt.subplots()\nsns.histplot(poll_result, stat='density', ax=ax)\nax.axvline(actual_barbie, color=\"orange\", lw=4);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n\n\n\n\n\n\n\n\n\n\nWhat fraction of these simulated samples would have predicted Barbie?\n\npoll_result = pd.Series(poll_result)\nnp.sum(poll_result &gt; 0.5)/1000\n\nnp.float64(0.957)\n\n\nYou can see the curve looks roughly Gaussian/normal. Using KDE:\n\n\nCode\nsns.histplot(poll_result, stat='density', kde=True);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.",
+    "text": "9.3 Probability Samples\nWhen sampling, it is essential to focus on the quality of the sample rather than the quantity of the sample. A huge sample size does not fix a bad sampling method. Our main goal is to gather a sample that is representative of the population it came from. In this section, we’ll explore the different types of sampling and their pros and cons.\nA convenience sample is whatever you can get ahold of; this type of sampling is non-random. Note that haphazard sampling is not necessarily random sampling; there are many potential sources of bias.\nIn a probability sample, we provide the chance that any specified set of individuals will be in the sample (individuals in the population can have different chances of being selected; they don’t all have to be uniform), and we sample at random based off this known chance. For this reason, probability samples are also called random samples. The randomness provides a few benefits:\n\nBecause we know the source probabilities, we can measure the errors.\nSampling at random gives us a more representative sample of the population, which reduces bias. (Note: this is only the case when the probability distribution we’re sampling from is accurate. Random samples using “bad” or inaccurate distributions can produce biased estimates of population quantities.)\nProbability samples allow us to estimate the bias and chance error, which helps us quantify uncertainty (more in a future lecture).\n\nThe real world is usually more complicated, and we often don’t know the initial probabilities. For example, we do not generally know the probability that a given bacterium is in a microbiome sample or whether people will answer when Gallup calls landlines. That being said, still we try to model probability sampling to the best of our ability even when the sampling or measurement process is not fully under our control.\nA few common random sampling schemes:\n\nA uniform random sample with replacement is a sample drawn uniformly at random with replacement.\n\nRandom doesn’t always mean “uniformly at random,” but in this specific context, it does.\nSome individuals in the population might get picked more than once.\n\nA simple random sample (SRS) is a sample drawn uniformly at random without replacement.\n\nEvery individual (and subset of individuals) has the same chance of being selected from the sampling frame.\nEvery pair has the same chance as every other pair.\nEvery triple has the same chance as every other triple.\nAnd so on.\n\nA stratified random sample, where random sampling is performed on strata (specific groups), and the groups together compose a sample.\n\n\n9.3.1 Example Scheme 1: Probability Sample\nSuppose we have 3 TA’s (Arman, Boyu, Charlie): I decide to sample 2 of them as follows:\n\nI choose A with probability 1.0\nI choose either B or C, each with a probability of 0.5.\n\nWe can list all the possible outcomes and their respective probabilities in a table:\n\n\n\nOutcome\nProbability\n\n\n\n\n{A, B}\n0.5\n\n\n{A, C}\n0.5\n\n\n{B, C}\n0\n\n\n\nThis is a probability sample (though not a great one). Of the 3 people in my population, I know the chance of getting each subset. Suppose I’m measuring the average distance TAs live from campus.\n\nThis scheme does not see the entire population!\nMy estimate using the single sample I take has some chance error depending on if I see AB or AC.\nThis scheme is biased towards A’s response.\n\n\n\n9.3.2 Example Scheme 2: Simple Random Sample\nConsider the following sampling scheme:\n\nA class roster has 1100 students listed alphabetically.\nPick one of the first 10 students on the list at random (e.g. Student 8).\nTo create your sample, take that student and every 10th student listed after that (e.g. Students 8, 18, 28, 38, etc.).\n\n\n\nIs this a probability sample?\n\nYes. For a sample [n, n + 10, n + 20, …, n + 1090], where 1 &lt;= n &lt;= 10, the probability of that sample is 1/10. Otherwise, the probability is 0.\nOnly 10 possible samples!\n\n\n\nDoes each student have the same probability of being selected?\n\nYes. Each student is chosen with a probability of 1/10.\n\n\n\nIs this a simple random sample?\n\nNo. The chance of selecting (8, 18) is 1/10; the chance of selecting (8, 9) is 0.\n\n\n\n9.3.3 Demo: Barbie v. Oppenheimer\nWe are trying to collect a sample from Berkeley residents to predict the which one of Barbie and Oppenheimer would perform better on their opening day, July 21st.\nFirst, let’s grab a dataset that has every single resident in Berkeley (this is a fake dataset) and which movie they actually watched on July 21st.\nLet’s load in the movie.csv table. We can assume that:\n\nis_male is a boolean that indicates if a resident identifies as male.\nThere are only two movies they can watch on July 21st: Barbie and Oppenheimer.\nEvery resident watches a movie (either Barbie or Oppenheimer) on July 21st.\n\n\n\nCode\nimport matplotlib.pyplot as plt\nimport numpy as np\nimport pandas as pd\nimport seaborn as sns\n\nsns.set_theme(style='darkgrid', font_scale = 1.5,\n              rc={'figure.figsize':(7,5)})\n\nrng = np.random.default_rng()\n\n\n\nmovie = pd.read_csv(\"data/movie.csv\")\n\n# create a 1/0 int that indicates Barbie vote\nmovie['barbie'] = (movie['movie'] == 'Barbie').astype(int)\nmovie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nmovie\nbarbie\n\n\n\n\n0\n35\nFalse\nBarbie\n1\n\n\n1\n42\nTrue\nOppenheimer\n0\n\n\n2\n55\nFalse\nBarbie\n1\n\n\n3\n77\nTrue\nOppenheimer\n0\n\n\n4\n31\nFalse\nBarbie\n1\n\n\n\n\n\n\n\nWhat fraction of Berkeley residents chose Barbie?\n\nactual_barbie = np.mean(movie[\"barbie\"])\nactual_barbie\n\nnp.float64(0.5302792307692308)\n\n\nThis is the actual outcome of the competition. Based on this result, Barbie would win. How did our sample of retirees do?\n\n9.3.3.1 Convenience Sample: Retirees\nLet’s take a convenience sample of people who have retired (&gt;= 65 years old). What proportion of them went to see Barbie instead of Oppenheimer?\n\nconvenience_sample = movie[movie['age'] &gt;= 65] # take a convenience sample of retirees\nnp.mean(convenience_sample[\"barbie\"]) # what proportion of them saw Barbie? \n\nnp.float64(0.3744755089093924)\n\n\nBased on this result, we would have predicted that Oppenheimer would win! What happened? Is it possible that our sample is too small or noisy?\n\n# what's the size of our sample? \nlen(convenience_sample)\n\n359396\n\n\n\n# what proportion of our data is in the convenience sample? \nlen(convenience_sample)/len(movie)\n\n0.27645846153846154\n\n\nSeems like our sample is rather large (roughly 360,000 people), so the error is likely not due to solely to chance.\n\n\n9.3.3.2 Check for Bias\nLet us aggregate all choices by age and visualize the fraction of Barbie views, split by gender.\n\nvotes_by_barbie = movie.groupby([\"age\",\"is_male\"]).agg(\"mean\", numeric_only=True).reset_index()\nvotes_by_barbie.head()\n\n\n\n\n\n\n\n\nage\nis_male\nbarbie\n\n\n\n\n0\n18\nFalse\n0.819594\n\n\n1\n18\nTrue\n0.667001\n\n\n2\n19\nFalse\n0.812214\n\n\n3\n19\nTrue\n0.661252\n\n\n4\n20\nFalse\n0.805281\n\n\n\n\n\n\n\n\n\nCode\n# A common matplotlib/seaborn pattern: create the figure and axes object, pass ax\n# to seaborn for drawing into, and later fine-tune the figure via ax.\nfig, ax = plt.subplots();\n\nred_blue = [\"#bf1518\", \"#397eb7\"]\nwith sns.color_palette(red_blue):\n    sns.pointplot(data=votes_by_barbie, x = \"age\", y = \"barbie\", hue = \"is_male\", ax=ax)\n\nnew_ticks = [i.get_text() for i in ax.get_xticklabels()]\nax.set_xticks(range(0, len(new_ticks), 10), new_ticks[::10])\nax.set_title(\"Preferences by Demographics\");\n\n\n\n\n\n\n\n\n\n\nWe see that retirees (in Berkeley) tend to watch Oppenheimer.\nWe also see that residents who identify as non-male tend to prefer Barbie.\n\n\n\n9.3.3.3 Simple Random Sample\nSuppose we took a simple random sample (SRS) of the same size as our retiree sample:\n\nn = len(convenience_sample)\nrandom_sample = movie.sample(n, replace = False) ## By default, replace = False\nnp.mean(random_sample[\"barbie\"])\n\nnp.float64(0.5303286625338067)\n\n\nThis is very close to the actual vote of 0.5302792307692308!\nIt turns out that we can get similar results with a much smaller sample size, say, 800:\n\nn = 800\nrandom_sample = movie.sample(n, replace = False)\n\n# Compute the sample average and the resulting relative error\nsample_barbie = np.mean(random_sample[\"barbie\"])\nerr = abs(sample_barbie-actual_barbie)/actual_barbie\n\n# We can print output with Markdown formatting too...\nfrom IPython.display import Markdown\nMarkdown(f\"**Actual** = {actual_barbie:.4f}, **Sample** = {sample_barbie:.4f}, \"\n         f\"**Err** = {100*err:.2f}%.\")\n\nActual = 0.5303, Sample = 0.5250, Err = 1.00%.\n\n\nWe’ll learn how to choose this number when we (re)learn the Central Limit Theorem later in the semester.\n\n\n9.3.3.4 Quantifying Chance Error\nIn our SRS of size 800, what would be our chance error?\nLet’s simulate 1000 versions of taking the 800-sized SRS from before:\n\nnrep = 1000   # number of simulations\nn = 800       # size of our sample\npoll_result = []\nfor i in range(0, nrep):\n    random_sample = movie.sample(n, replace = False)\n    poll_result.append(np.mean(random_sample[\"barbie\"]))\n\n\n\nCode\nfig, ax = plt.subplots()\nsns.histplot(poll_result, stat='density', ax=ax)\nax.axvline(actual_barbie, color=\"orange\", lw=4);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n\n\n\n\n\n\n\n\n\n\nWhat fraction of these simulated samples would have predicted Barbie?\n\npoll_result = pd.Series(poll_result)\nnp.sum(poll_result &gt; 0.5)/1000\n\nnp.float64(0.934)\n\n\nYou can see the curve looks roughly Gaussian/normal. Using KDE:\n\n\nCode\nsns.histplot(poll_result, stat='density', kde=True);\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.",
     "crumbs": [
       "<span class='chapter-number'>9</span>  <span class='chapter-title'>Sampling</span>"
     ]
@@ -654,7 +654,7 @@
     "href": "constant_model_loss_transformations/loss_transformations.html#constant-model-mse",
     "title": "11  Constant Model, Loss, and Transformations",
     "section": "11.1 Constant Model + MSE",
-    "text": "11.1 Constant Model + MSE\nNow, we’ll shift from the SLR model to the constant model, also known as a summary statistic. The constant model is slightly different from the simple linear regression model we’ve explored previously. Rather than generating predictions from an inputted feature variable, the constant model always predicts the same constant number. This ignores any relationships between variables. For example, let’s say we want to predict the number of drinks a boba shop sells in a day. Boba tea sales likely depend on the time of year, the weather, how the customers feel, whether school is in session, etc., but the constant model ignores these factors in favor of a simpler model. In other words, the constant model employs a simplifying assumption.\nIt is also a parametric, statistical model:\n\\[\\hat{y} = \\theta_0\\]\n\\(\\theta_0\\) is the parameter of the constant model, just as \\(\\theta_0\\) and \\(\\theta_1\\) were the parameters in SLR. Since our parameter \\(\\theta_0\\) is 1-dimensional (\\(\\theta_0 \\in \\mathbb{R}\\)), we now have no input to our model and will always predict \\(\\hat{y} = \\theta_0\\).\n\n11.1.1 Deriving the optimal \\(\\theta_0\\)\nOur task now is to determine what value of \\(\\theta_0\\) best represents the optimal model – in other words, what number should we guess each time to have the lowest possible average loss on our data?\nLike before, we’ll use Mean Squared Error (MSE). Recall that the MSE is average squared loss (L2 loss) over the data \\(D = \\{y_1, y_2, ..., y_n\\}\\).\n\\[\\hat{R}(\\theta) = \\frac{1}{n}\\sum^{n}_{i=1} (y_i - \\hat{y_i})^2 \\]\nOur modeling process now looks like this:\n\nChoose a model: constant model\nChoose a loss function: L2 loss\nFit the model\nEvaluate model performance\n\nGiven the constant model \\(\\hat{y} = \\theta_0\\), we can rewrite the MSE equation as\n\\[\\hat{R}(\\theta) = \\frac{1}{n}\\sum^{n}_{i=1} (y_i - \\theta_0)^2 \\]\nWe can fit the model by finding the optimal \\(\\hat{\\theta_0}\\) that minimizes the MSE using a calculus approach.\n\nDifferentiate with respect to \\(\\theta_0\\):\n\n\\[\n\\begin{align}\n\\frac{d}{d\\theta_0}\\text{R}(\\theta) & = \\frac{d}{d\\theta_0}(\\frac{1}{n}\\sum^{n}_{i=1} (y_i - \\theta_0)^2)\n\\\\ &= \\frac{1}{n}\\sum^{n}_{i=1} \\frac{d}{d\\theta_0}  (y_i - \\theta_0)^2 \\quad \\quad \\text{a derivative of sum is a sum of derivatives}\n\\\\ &= \\frac{1}{n}\\sum^{n}_{i=1} 2 (y_i - \\theta_0) (-1) \\quad \\quad \\text{chain rule}\n\\\\ &= {\\frac{-2}{n}}\\sum^{n}_{i=1} (y_i - \\theta_0) \\quad \\quad \\text{simplify constants}\n\\end{align}\n\\]\n\nSet the derivative equation equal to 0:\n\\[\n0 = {\\frac{-2}{n}}\\sum^{n}_{i=1} (y_i - \\hat{\\theta_0})\n\\]\nSolve for \\(\\hat{\\theta_0}\\)\n\n\\[\n\\begin{align}\n0 &= {\\frac{-2}{n}}\\sum^{n}_{i=1} (y_i - \\hat{\\theta_0})\n\\\\ &= \\sum^{n}_{i=1} (y_i - \\hat{\\theta_0}) \\quad \\quad \\text{divide both sides by} \\frac{-2}{n}\n\\\\ &= \\left(\\sum^{n}_{i=1} y_i\\right) - \\left(\\sum^{n}_{i=1} \\theta_0\\right) \\quad \\quad \\text{separate sums}\n\\\\ &= \\left(\\sum^{n}_{i=1} y_i\\right) - (n \\cdot \\hat{\\theta_0}) \\quad \\quad  \\text{c + c + … + c = nc}\n\\\\ n \\cdot \\hat{\\theta_0} &= \\sum^{n}_{i=1} y_i\n\\\\ \\hat{\\theta_0} &= \\frac{1}{n} \\sum^{n}_{i=1} y_i\n\\\\ \\hat{\\theta_0} &= \\bar{y}\n\\end{align}\n\\]\nLet’s take a moment to interpret this result. \\(\\hat{\\theta_0} = \\bar{y}\\) is the optimal parameter for constant model + MSE. It holds true regardless of what data sample you have, and it provides some formal reasoning as to why the mean is such a common summary statistic.\nOur optimal model parameter is the value of the parameter that minimizes the cost function. This minimum value of the cost function can be expressed:\n\\[R(\\hat{\\theta_0}) = \\min_{\\theta_0} R(\\theta_0)\\]\nTo restate the above in plain English: we are looking at the value of the cost function when it takes the best parameter as input. This optimal model parameter, \\(\\hat{\\theta_0}\\), is the value of \\(\\theta_0\\) that minimizes the cost \\(R\\).\nFor modeling purposes, we care less about the minimum value of cost, \\(R(\\hat{\\theta_0})\\), and more about the value of \\(\\theta\\) that results in this lowest average loss. In other words, we concern ourselves with finding the best parameter value such that:\n\\[\\hat{\\theta} = \\underset{\\theta}{\\operatorname{\\arg\\min}}\\:R(\\theta)\\]\nThat is, we want to find the argument \\(\\theta\\) that minimizes the cost function.\n\n\n11.1.2 Comparing Two Different Models, Both Fit with MSE\nNow that we’ve explored the constant model with an L2 loss, we can compare it to the SLR model that we learned last lecture. Consider the dataset below, which contains information about the ages and lengths of dugongs. Supposed we wanted to predict dugong ages:\n\n\n\n\n\n\n\n\n\nConstant Model\nSimple Linear Regression\n\n\n\n\nmodel\n\\(\\hat{y} = \\theta_0\\)\n\\(\\hat{y} = \\theta_0 + \\theta_1 x\\)\n\n\ndata\nsample of ages \\(D = \\{y_1, y_2, ..., y_n\\}\\)\nsample of ages \\(D = \\{(x_1, y_1), (x_2, y_2), ..., (x_n, y_n)\\}\\)\n\n\ndimensions\n\\(\\hat{\\theta_0}\\) is 1-D\n\\(\\hat{\\theta} = [\\hat{\\theta_0}, \\hat{\\theta_1}]\\) is 2-D\n\n\nloss surface\n2-D \n3-D \n\n\nloss model\n\\(\\hat{R}(\\theta) = \\frac{1}{n}\\sum^{n}_{i=1} (y_i - \\theta_0)^2\\)\n\\(\\hat{R}(\\theta_0, \\theta_1) = \\frac{1}{n}\\sum^{n}_{i=1} (y_i - (\\theta_0 + \\theta_1 x))^2\\)\n\n\nRMSE\n7.72\n4.31\n\n\npredictions visualized\nrug plot \nscatter plot \n\n\n\n(Notice how the points for our SLR scatter plot are visually not a great linear fit. We’ll come back to this).\nThe code for generating the graphs and models is included below, but we won’t go over it in too much depth.\n\n\nCode\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\n%matplotlib inline\nimport seaborn as sns\nimport itertools\nfrom mpl_toolkits.mplot3d import Axes3D\ndugongs = pd.read_csv(\"data/dugongs.csv\")\ndata_constant = dugongs[\"Age\"]\ndata_linear = dugongs[[\"Length\", \"Age\"]]\n\n\n\n\nCode\n# Big font helper\ndef adjust_fontsize(size=None):\n    SMALL_SIZE = 8\n    MEDIUM_SIZE = 10\n    BIGGER_SIZE = 12\n    if size != None:\n        SMALL_SIZE = MEDIUM_SIZE = BIGGER_SIZE = size\n\n    plt.rc(\"font\", size=SMALL_SIZE)  # controls default text sizes\n    plt.rc(\"axes\", titlesize=SMALL_SIZE)  # fontsize of the axes title\n    plt.rc(\"axes\", labelsize=MEDIUM_SIZE)  # fontsize of the x and y labels\n    plt.rc(\"xtick\", labelsize=SMALL_SIZE)  # fontsize of the tick labels\n    plt.rc(\"ytick\", labelsize=SMALL_SIZE)  # fontsize of the tick labels\n    plt.rc(\"legend\", fontsize=SMALL_SIZE)  # legend fontsize\n    plt.rc(\"figure\", titlesize=BIGGER_SIZE)  # fontsize of the figure title\n\nplt.style.use(\"default\")  # Revert style to default mpl\n\n\n\n\nCode\n# Constant Model + MSE\nplt.style.use('default') # Revert style to default mpl\nadjust_fontsize(size=16)\n%matplotlib inline\n\ndef mse_constant(theta, data):\n    return np.mean(np.array([(y_obs - theta) ** 2 for y_obs in data]), axis=0)\n\nthetas = np.linspace(-20, 42, 1000)\nl2_loss_thetas = mse_constant(thetas, data_constant)\n\n# Plotting the loss surface\nplt.plot(thetas, l2_loss_thetas)\nplt.xlabel(r'$\\theta_0$')\nplt.ylabel(r'MSE')\n\n# Optimal point\nthetahat = np.mean(data_constant)\nplt.scatter([thetahat], [mse_constant(thetahat, data_constant)], s=50, label = r\"$\\hat{\\theta}_0$\")\nplt.legend();\n# plt.show()\n\n\n\n\n\n\n\n\n\n\n\nCode\n# SLR + MSE\ndef mse_linear(theta_0, theta_1, data_linear):\n    data_x, data_y = data_linear.iloc[:, 0], data_linear.iloc[:, 1]\n    return np.mean(\n        np.array([(y - (theta_0 + theta_1 * x)) ** 2 for x, y in zip(data_x, data_y)]),\n        axis=0,\n    )\n\n\n# plotting the loss surface\ntheta_0_values = np.linspace(-80, 20, 80)\ntheta_1_values = np.linspace(-10, 30, 80)\nmse_values = np.array(\n    [[mse_linear(x, y, data_linear) for x in theta_0_values] for y in theta_1_values]\n)\n\n# Optimal point\ndata_x, data_y = data_linear.iloc[:, 0], data_linear.iloc[:, 1]\ntheta_1_hat = np.corrcoef(data_x, data_y)[0, 1] * np.std(data_y) / np.std(data_x)\ntheta_0_hat = np.mean(data_y) - theta_1_hat * np.mean(data_x)\n\n# Create the 3D plot\nfig = plt.figure(figsize=(7, 5))\nax = fig.add_subplot(111, projection=\"3d\")\n\nX, Y = np.meshgrid(theta_0_values, theta_1_values)\nsurf = ax.plot_surface(\n    X, Y, mse_values, cmap=\"viridis\", alpha=0.6\n)  # Use alpha to make it slightly transparent\n\n# Scatter point using matplotlib\nsc = ax.scatter(\n    [theta_0_hat],\n    [theta_1_hat],\n    [mse_linear(theta_0_hat, theta_1_hat, data_linear)],\n    marker=\"o\",\n    color=\"red\",\n    s=100,\n    label=\"theta hat\",\n)\n\n# Create a colorbar\ncbar = fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10)\ncbar.set_label(\"Cost Value\")\n\nax.set_title(\"MSE for different $\\\\theta_0, \\\\theta_1$\")\nax.set_xlabel(\"$\\\\theta_0$\")\nax.set_ylabel(\"$\\\\theta_1$\")\nax.set_zlabel(\"MSE\");\n\n# plt.show()\n\n\n\n\n\n\n\n\n\n\n\nCode\n# Predictions\nyobs = data_linear[\"Age\"]  # The true observations y\nxs = data_linear[\"Length\"]  # Needed for linear predictions\nn = len(yobs)  # Predictions\n\nyhats_constant = [thetahat for i in range(n)]  # Not used, but food for thought\nyhats_linear = [theta_0_hat + theta_1_hat * x for x in xs]\n\n\n\n\nCode\n# Constant Model Rug Plot\n# In case we're in a weird style state\nsns.set_theme()\nadjust_fontsize(size=16)\n%matplotlib inline\n\nfig = plt.figure(figsize=(8, 1.5))\nsns.rugplot(yobs, height=0.25, lw=2) ;\nplt.axvline(thetahat, color='red', lw=4, label=r\"$\\hat{\\theta}_0$\");\nplt.legend()\nplt.yticks([]);\n# plt.show()\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n\n\n\n\n\n\n\n\n\n\n\n\nCode\n# SLR model scatter plot \n# In case we're in a weird style state\nsns.set_theme()\nadjust_fontsize(size=16)\n%matplotlib inline\n\nsns.scatterplot(x=xs, y=yobs)\nplt.plot(xs, yhats_linear, color='red', lw=4);\n# plt.savefig('dugong_line.png', bbox_inches = 'tight');\n# plt.show()\n\n\n\n\n\n\n\n\n\nInterpreting the RMSE (Root Mean Squared Error):\n\nBecause the constant error is HIGHER than the linear error,\nThe constant model is WORSE than the linear model (at least for this metric).",
+    "text": "11.1 Constant Model + MSE\nNow, we’ll shift from the SLR model to the constant model, also known as a summary statistic. The constant model is slightly different from the simple linear regression model we’ve explored previously. Rather than generating predictions from an inputted feature variable, the constant model always predicts the same constant number. This ignores any relationships between variables. For example, let’s say we want to predict the number of drinks a boba shop sells in a day. Boba tea sales likely depend on the time of year, the weather, how the customers feel, whether school is in session, etc., but the constant model ignores these factors in favor of a simpler model. In other words, the constant model employs a simplifying assumption.\nIt is also a parametric, statistical model:\n\\[\\hat{y} = \\theta_0\\]\n\\(\\theta_0\\) is the parameter of the constant model, just as \\(\\theta_0\\) and \\(\\theta_1\\) were the parameters in SLR. Since our parameter \\(\\theta_0\\) is 1-dimensional (\\(\\theta_0 \\in \\mathbb{R}\\)), we now have no input to our model and will always predict \\(\\hat{y} = \\theta_0\\).\n\n11.1.1 Deriving the optimal \\(\\theta_0\\)\nOur task now is to determine what value of \\(\\theta_0\\) best represents the optimal model – in other words, what number should we guess each time to have the lowest possible average loss on our data?\nLike before, we’ll use Mean Squared Error (MSE). Recall that the MSE is average squared loss (L2 loss) over the data \\(D = \\{y_1, y_2, ..., y_n\\}\\).\n\\[\\hat{R}(\\theta) = \\frac{1}{n}\\sum^{n}_{i=1} (y_i - \\hat{y_i})^2 \\]\nOur modeling process now looks like this:\n\nChoose a model: constant model\nChoose a loss function: L2 loss\nFit the model\nEvaluate model performance\n\nGiven the constant model \\(\\hat{y} = \\theta_0\\), we can rewrite the MSE equation as\n\\[\\hat{R}(\\theta) = \\frac{1}{n}\\sum^{n}_{i=1} (y_i - \\theta_0)^2 \\]\nWe can fit the model by finding the optimal \\(\\hat{\\theta_0}\\) that minimizes the MSE using a calculus approach.\n\nDifferentiate with respect to \\(\\theta_0\\):\n\n\\[\n\\begin{align}\n\\frac{d}{d\\theta_0}\\text{R}(\\theta) & = \\frac{d}{d\\theta_0}(\\frac{1}{n}\\sum^{n}_{i=1} (y_i - \\theta_0)^2)\n\\\\ &= \\frac{1}{n}\\sum^{n}_{i=1} \\frac{d}{d\\theta_0}  (y_i - \\theta_0)^2 \\quad \\quad \\text{a derivative of sum is a sum of derivatives}\n\\\\ &= \\frac{1}{n}\\sum^{n}_{i=1} 2 (y_i - \\theta_0) (-1) \\quad \\quad \\text{chain rule}\n\\\\ &= {\\frac{-2}{n}}\\sum^{n}_{i=1} (y_i - \\theta_0) \\quad \\quad \\text{simplify constants}\n\\end{align}\n\\]\n\nSet the derivative equation equal to 0:\n\\[\n0 = {\\frac{-2}{n}}\\sum^{n}_{i=1} (y_i - \\hat{\\theta_0})\n\\]\nSolve for \\(\\hat{\\theta_0}\\)\n\n\\[\n\\begin{align}\n0 &= {\\frac{-2}{n}}\\sum^{n}_{i=1} (y_i - \\hat{\\theta_0})\n\\\\ &= \\sum^{n}_{i=1} (y_i - \\hat{\\theta_0}) \\quad \\quad \\text{divide both sides by} \\frac{-2}{n}\n\\\\ &= \\left(\\sum^{n}_{i=1} y_i\\right) - \\left(\\sum^{n}_{i=1} \\hat{\\theta_0}\\right) \\quad \\quad \\text{separate sums}\n\\\\ &= \\left(\\sum^{n}_{i=1} y_i\\right) - (n \\cdot \\hat{\\theta_0}) \\quad \\quad  \\text{c + c + … + c = nc}\n\\\\ n \\cdot \\hat{\\theta_0} &= \\sum^{n}_{i=1} y_i\n\\\\ \\hat{\\theta_0} &= \\frac{1}{n} \\sum^{n}_{i=1} y_i\n\\\\ \\hat{\\theta_0} &= \\bar{y}\n\\end{align}\n\\]\nLet’s take a moment to interpret this result. \\(\\hat{\\theta_0} = \\bar{y}\\) is the optimal parameter for constant model + MSE. It holds true regardless of what data sample you have, and it provides some formal reasoning as to why the mean is such a common summary statistic.\nOur optimal model parameter is the value of the parameter that minimizes the cost function. This minimum value of the cost function can be expressed:\n\\[R(\\hat{\\theta_0}) = \\min_{\\theta_0} R(\\theta_0)\\]\nTo restate the above in plain English: we are looking at the value of the cost function when it takes the best parameter as input. This optimal model parameter, \\(\\hat{\\theta_0}\\), is the value of \\(\\theta_0\\) that minimizes the cost \\(R\\).\nFor modeling purposes, we care less about the minimum value of cost, \\(R(\\hat{\\theta_0})\\), and more about the value of \\(\\theta\\) that results in this lowest average loss. In other words, we concern ourselves with finding the best parameter value such that:\n\\[\\hat{\\theta} = \\underset{\\theta}{\\operatorname{\\arg\\min}}\\:R(\\theta)\\]\nThat is, we want to find the argument \\(\\theta\\) that minimizes the cost function.\n\n\n11.1.2 Comparing Two Different Models, Both Fit with MSE\nNow that we’ve explored the constant model with an L2 loss, we can compare it to the SLR model that we learned last lecture. Consider the dataset below, which contains information about the ages and lengths of dugongs. Supposed we wanted to predict dugong ages:\n\n\n\n\n\n\n\n\n\nConstant Model\nSimple Linear Regression\n\n\n\n\nmodel\n\\(\\hat{y} = \\theta_0\\)\n\\(\\hat{y} = \\theta_0 + \\theta_1 x\\)\n\n\ndata\nsample of ages \\(D = \\{y_1, y_2, ..., y_n\\}\\)\nsample of ages \\(D = \\{(x_1, y_1), (x_2, y_2), ..., (x_n, y_n)\\}\\)\n\n\ndimensions\n\\(\\hat{\\theta_0}\\) is 1-D\n\\(\\hat{\\theta} = [\\hat{\\theta_0}, \\hat{\\theta_1}]\\) is 2-D\n\n\nloss surface\n2-D \n3-D \n\n\nloss model\n\\(\\hat{R}(\\theta) = \\frac{1}{n}\\sum^{n}_{i=1} (y_i - \\theta_0)^2\\)\n\\(\\hat{R}(\\theta_0, \\theta_1) = \\frac{1}{n}\\sum^{n}_{i=1} (y_i - (\\theta_0 + \\theta_1 x))^2\\)\n\n\nRMSE\n7.72\n4.31\n\n\npredictions visualized\nrug plot \nscatter plot \n\n\n\n(Notice how the points for our SLR scatter plot are visually not a great linear fit. We’ll come back to this).\nThe code for generating the graphs and models is included below, but we won’t go over it in too much depth.\n\n\nCode\nimport numpy as np\nimport pandas as pd\nimport matplotlib.pyplot as plt\n%matplotlib inline\nimport seaborn as sns\nimport itertools\nfrom mpl_toolkits.mplot3d import Axes3D\ndugongs = pd.read_csv(\"data/dugongs.csv\")\ndata_constant = dugongs[\"Age\"]\ndata_linear = dugongs[[\"Length\", \"Age\"]]\n\n\n\n\nCode\n# Big font helper\ndef adjust_fontsize(size=None):\n    SMALL_SIZE = 8\n    MEDIUM_SIZE = 10\n    BIGGER_SIZE = 12\n    if size != None:\n        SMALL_SIZE = MEDIUM_SIZE = BIGGER_SIZE = size\n\n    plt.rc(\"font\", size=SMALL_SIZE)  # controls default text sizes\n    plt.rc(\"axes\", titlesize=SMALL_SIZE)  # fontsize of the axes title\n    plt.rc(\"axes\", labelsize=MEDIUM_SIZE)  # fontsize of the x and y labels\n    plt.rc(\"xtick\", labelsize=SMALL_SIZE)  # fontsize of the tick labels\n    plt.rc(\"ytick\", labelsize=SMALL_SIZE)  # fontsize of the tick labels\n    plt.rc(\"legend\", fontsize=SMALL_SIZE)  # legend fontsize\n    plt.rc(\"figure\", titlesize=BIGGER_SIZE)  # fontsize of the figure title\n\nplt.style.use(\"default\")  # Revert style to default mpl\n\n\n\n\nCode\n# Constant Model + MSE\nplt.style.use('default') # Revert style to default mpl\nadjust_fontsize(size=16)\n%matplotlib inline\n\ndef mse_constant(theta, data):\n    return np.mean(np.array([(y_obs - theta) ** 2 for y_obs in data]), axis=0)\n\nthetas = np.linspace(-20, 42, 1000)\nl2_loss_thetas = mse_constant(thetas, data_constant)\n\n# Plotting the loss surface\nplt.plot(thetas, l2_loss_thetas)\nplt.xlabel(r'$\\theta_0$')\nplt.ylabel(r'MSE')\n\n# Optimal point\nthetahat = np.mean(data_constant)\nplt.scatter([thetahat], [mse_constant(thetahat, data_constant)], s=50, label = r\"$\\hat{\\theta}_0$\")\nplt.legend();\n# plt.show()\n\n\n\n\n\n\n\n\n\n\n\nCode\n# SLR + MSE\ndef mse_linear(theta_0, theta_1, data_linear):\n    data_x, data_y = data_linear.iloc[:, 0], data_linear.iloc[:, 1]\n    return np.mean(\n        np.array([(y - (theta_0 + theta_1 * x)) ** 2 for x, y in zip(data_x, data_y)]),\n        axis=0,\n    )\n\n\n# plotting the loss surface\ntheta_0_values = np.linspace(-80, 20, 80)\ntheta_1_values = np.linspace(-10, 30, 80)\nmse_values = np.array(\n    [[mse_linear(x, y, data_linear) for x in theta_0_values] for y in theta_1_values]\n)\n\n# Optimal point\ndata_x, data_y = data_linear.iloc[:, 0], data_linear.iloc[:, 1]\ntheta_1_hat = np.corrcoef(data_x, data_y)[0, 1] * np.std(data_y) / np.std(data_x)\ntheta_0_hat = np.mean(data_y) - theta_1_hat * np.mean(data_x)\n\n# Create the 3D plot\nfig = plt.figure(figsize=(7, 5))\nax = fig.add_subplot(111, projection=\"3d\")\n\nX, Y = np.meshgrid(theta_0_values, theta_1_values)\nsurf = ax.plot_surface(\n    X, Y, mse_values, cmap=\"viridis\", alpha=0.6\n)  # Use alpha to make it slightly transparent\n\n# Scatter point using matplotlib\nsc = ax.scatter(\n    [theta_0_hat],\n    [theta_1_hat],\n    [mse_linear(theta_0_hat, theta_1_hat, data_linear)],\n    marker=\"o\",\n    color=\"red\",\n    s=100,\n    label=\"theta hat\",\n)\n\n# Create a colorbar\ncbar = fig.colorbar(surf, ax=ax, shrink=0.5, aspect=10)\ncbar.set_label(\"Cost Value\")\n\nax.set_title(\"MSE for different $\\\\theta_0, \\\\theta_1$\")\nax.set_xlabel(\"$\\\\theta_0$\")\nax.set_ylabel(\"$\\\\theta_1$\")\nax.set_zlabel(\"MSE\");\n\n# plt.show()\n\n\n\n\n\n\n\n\n\n\n\nCode\n# Predictions\nyobs = data_linear[\"Age\"]  # The true observations y\nxs = data_linear[\"Length\"]  # Needed for linear predictions\nn = len(yobs)  # Predictions\n\nyhats_constant = [thetahat for i in range(n)]  # Not used, but food for thought\nyhats_linear = [theta_0_hat + theta_1_hat * x for x in xs]\n\n\n\n\nCode\n# Constant Model Rug Plot\n# In case we're in a weird style state\nsns.set_theme()\nadjust_fontsize(size=16)\n%matplotlib inline\n\nfig = plt.figure(figsize=(8, 1.5))\nsns.rugplot(yobs, height=0.25, lw=2) ;\nplt.axvline(thetahat, color='red', lw=4, label=r\"$\\hat{\\theta}_0$\");\nplt.legend()\nplt.yticks([]);\n# plt.show()\n\n\n/Users/nikhilreddy/course-notes/ds100env/lib/python3.12/site-packages/seaborn/_oldcore.py:1119: FutureWarning:\n\nuse_inf_as_na option is deprecated and will be removed in a future version. Convert inf values to NaN before operating instead.\n\n\n\n\n\n\n\n\n\n\n\n\nCode\n# SLR model scatter plot \n# In case we're in a weird style state\nsns.set_theme()\nadjust_fontsize(size=16)\n%matplotlib inline\n\nsns.scatterplot(x=xs, y=yobs)\nplt.plot(xs, yhats_linear, color='red', lw=4);\n# plt.savefig('dugong_line.png', bbox_inches = 'tight');\n# plt.show()\n\n\n\n\n\n\n\n\n\nInterpreting the RMSE (Root Mean Squared Error):\n\nBecause the constant error is HIGHER than the linear error,\nThe constant model is WORSE than the linear model (at least for this metric).",
     "crumbs": [
       "<span class='chapter-number'>11</span>  <span class='chapter-title'>Constant Model, Loss, and Transformations</span>"
     ]
diff --git a/docs/visualization_1/visualization_1.html b/docs/visualization_1/visualization_1.html
index 00cf7565..df843da7 100644
--- a/docs/visualization_1/visualization_1.html
+++ b/docs/visualization_1/visualization_1.html
@@ -443,7 +443,7 @@ <h2 data-number="7.4" class="anchored" data-anchor-id="variable-types-should-inf
 <h2 data-number="7.5" class="anchored" data-anchor-id="qualitative-variables-bar-plots"><span class="header-section-number">7.5</span> Qualitative Variables: Bar Plots</h2>
 <p>A <strong>bar plot</strong> is one of the most common ways of displaying the <strong>distribution</strong> of a <strong>qualitative</strong> (categorical) variable. The length of a bar plot encodes the frequency of a category; the width encodes no useful information. The color <em>could</em> indicate a sub-category, but this is not necessarily the case.</p>
 <p>Let’s contextualize this in an example. We will use the World Bank dataset (<code>wb</code>) in our analysis.</p>
-<div id="3572fdd7" class="cell" data-execution_count="1">
+<div id="abb0a6cf" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -617,7 +617,7 @@ <h2 data-number="7.5" class="anchored" data-anchor-id="qualitative-variables-bar
 <p>We can visualize the distribution of the <code>Continent</code> column using a bar plot. There are a few ways to do this.</p>
 <section id="plotting-in-pandas" class="level3" data-number="7.5.1">
 <h3 data-number="7.5.1" class="anchored" data-anchor-id="plotting-in-pandas"><span class="header-section-number">7.5.1</span> Plotting in Pandas</h3>
-<div id="0796592f" class="cell" data-execution_count="2">
+<div id="bb8fc761" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a>wb[<span class="st">'Continent'</span>].value_counts().plot(kind<span class="op">=</span><span class="st">'bar'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -632,7 +632,7 @@ <h3 data-number="7.5.1" class="anchored" data-anchor-id="plotting-in-pandas"><sp
 </section>
 <section id="plotting-in-matplotlib" class="level3" data-number="7.5.2">
 <h3 data-number="7.5.2" class="anchored" data-anchor-id="plotting-in-matplotlib"><span class="header-section-number">7.5.2</span> Plotting in Matplotlib</h3>
-<div id="1f0eb714" class="cell" data-execution_count="3">
+<div id="ae651852" class="cell" data-execution_count="3">
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt <span class="co"># matplotlib is typically given the alias plt</span></span>
 <span id="cb3-2"><a href="#cb3-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb3-3"><a href="#cb3-3" aria-hidden="true" tabindex="-1"></a>continent <span class="op">=</span> wb[<span class="st">'Continent'</span>].value_counts()</span>
@@ -652,7 +652,7 @@ <h3 data-number="7.5.2" class="anchored" data-anchor-id="plotting-in-matplotlib"
 </section>
 <section id="plotting-in-seaborn" class="level3" data-number="7.5.3">
 <h3 data-number="7.5.3" class="anchored" data-anchor-id="plotting-in-seaborn"><span class="header-section-number">7.5.3</span> Plotting in <code>Seaborn</code></h3>
-<div id="253b2559" class="cell" data-execution_count="4">
+<div id="cf7c373c" class="cell" data-execution_count="4">
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns <span class="co"># seaborn is typically given the alias sns</span></span>
 <span id="cb4-2"><a href="#cb4-2" aria-hidden="true" tabindex="-1"></a>sns.countplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Continent'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
@@ -675,7 +675,7 @@ <h3 data-number="7.5.3" class="anchored" data-anchor-id="plotting-in-seaborn"><s
 <section id="distributions-of-quantitative-variables" class="level2" data-number="7.6">
 <h2 data-number="7.6" class="anchored" data-anchor-id="distributions-of-quantitative-variables"><span class="header-section-number">7.6</span> Distributions of Quantitative Variables</h2>
 <p>Revisiting our example with the <code>wb</code> DataFrame, let’s plot the distribution of <code>Gross national income per capita</code>.</p>
-<div id="9651932f" class="cell" data-execution_count="5">
+<div id="57dc1e1d" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>wb.head(<span class="dv">5</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
@@ -841,7 +841,7 @@ <h2 data-number="7.6" class="anchored" data-anchor-id="distributions-of-quantita
 </div>
 <p>How should we define our categories for this variable? In the previous example, these were a few unique values of the <code>Continent</code> column. If we use similar logic here, our categories are the different numerical values contained in the <code>Gross national income per capita</code> column.</p>
 <p>Under this assumption, let’s plot this distribution using the <code>seaborn.countplot</code> function.</p>
-<div id="4754028c" class="cell" data-execution_count="6">
+<div id="1c38f346" class="cell" data-execution_count="6">
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a>sns.countplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Gross national income per capita, Atlas method: $: 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -863,7 +863,7 @@ <h2 data-number="7.6" class="anchored" data-anchor-id="distributions-of-quantita
 <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-plots"><span class="header-section-number">7.6.1</span> Box Plots and Violin Plots</h3>
 <p>Box plots and violin plots are two very similar kinds of visualizations. Both display the distribution of a variable using information about <strong>quartiles</strong>.</p>
 <p>In a box plot, the width of the box at any point does not encode meaning. In a violin plot, the width of the plot indicates the density of the distribution at each possible value.</p>
-<div id="bc3a38f9" class="cell" data-execution_count="7">
+<div id="740ca388" class="cell" data-execution_count="7">
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>sns.boxplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">'Gross national income per capita, Atlas method: $: 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -873,7 +873,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 </div>
 </div>
 </div>
-<div id="c76c357c" class="cell" data-execution_count="8">
+<div id="07cf48d8" class="cell" data-execution_count="8">
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>sns.violinplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -890,7 +890,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 <li>The third quartile (Q3) represents the 75th percentile – 75% of the data is smaller than or equal to the third quartile.</li>
 </ul>
 <p>This means that the middle 50% of the data lies between the first and third quartiles. This is demonstrated in the histogram below. The three quartiles are marked with red vertical bars.</p>
-<div id="44ca7d58" class="cell" data-execution_count="9">
+<div id="3687d557" class="cell" data-execution_count="9">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>gdp <span class="op">=</span> wb[<span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>]</span>
@@ -929,7 +929,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 </div>
 </div>
 <p>In a box plot, the lower extent of the box lies at Q1, while the upper extent of the box lies at Q3. The horizontal line in the middle of the box corresponds to Q2 (equivalently, the median).</p>
-<div id="5c1a96a2" class="cell" data-execution_count="10">
+<div id="6d8cf071" class="cell" data-execution_count="10">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>sns.boxplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -945,7 +945,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 <img src="images/box_plot_diagram.png" width="600">
 </center>
 <p>A violin plot displays quartile information, albeit a bit more subtly through smoothed density curves. Look closely at the center vertical bar of the violin plot below; the three quartiles and “whiskers” are still present!</p>
-<div id="2f6eb1ad" class="cell" data-execution_count="11">
+<div id="8027f7d9" class="cell" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>sns.violinplot(data<span class="op">=</span>wb, y<span class="op">=</span><span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -960,7 +960,7 @@ <h3 data-number="7.6.1" class="anchored" data-anchor-id="box-plots-and-violin-pl
 <h3 data-number="7.6.2" class="anchored" data-anchor-id="side-by-side-box-and-violin-plots"><span class="header-section-number">7.6.2</span> Side-by-Side Box and Violin Plots</h3>
 <p>Plotting side-by-side box or violin plots allows us to compare distributions across different categories. In other words, they enable us to plot both a qualitative variable and a quantitative continuous variable in one visualization.</p>
 <p>With <code>seaborn</code>, we can easily create side-by-side plots by specifying both an x and y column.</p>
-<div id="563590ca" class="cell" data-execution_count="12">
+<div id="ffcff689" class="cell" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>sns.boxplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"Continent"</span>, y<span class="op">=</span><span class="st">'Gross domestic product: </span><span class="sc">% g</span><span class="st">rowth : 2016'</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
 <div>
@@ -977,7 +977,7 @@ <h3 data-number="7.6.3" class="anchored" data-anchor-id="histograms"><span class
 <section id="plotting-histograms" class="level4" data-number="7.6.3.1">
 <h4 data-number="7.6.3.1" class="anchored" data-anchor-id="plotting-histograms"><span class="header-section-number">7.6.3.1</span> Plotting Histograms</h4>
 <p>Below, we plot a histogram using matplotlib and seaborn. Which graph do you prefer?</p>
-<div id="a1590331" class="cell" data-execution_count="13">
+<div id="4880319b" class="cell" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The `edgecolor` argument controls the color of the bin edges</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>gni <span class="op">=</span> wb[<span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>]</span>
 <span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a>plt.hist(gni, density<span class="op">=</span><span class="va">True</span>, edgecolor<span class="op">=</span><span class="st">"white"</span>)</span>
@@ -994,7 +994,7 @@ <h4 data-number="7.6.3.1" class="anchored" data-anchor-id="plotting-histograms">
 </div>
 </div>
 </div>
-<div id="f446d60b" class="cell" data-execution_count="14">
+<div id="8797ec7f" class="cell" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>, stat<span class="op">=</span><span class="st">"density"</span>)</span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display">
@@ -1011,14 +1011,14 @@ <h4 data-number="7.6.3.2" class="anchored" data-anchor-id="overlaid-histograms">
 <p>We can overlay histograms (or density curves) to compare distributions across qualitative categories.</p>
 <p>The <code>hue</code> parameter of <code>sns.histplot</code> specifies the column that should be used to determine the color of each category. <code>hue</code> can be used in many <code>seaborn</code> plotting functions.</p>
 <p>Notice that the resulting plot includes a legend describing which color corresponds to each hemisphere – a legend should always be included if color is used to encode information in a visualization!</p>
-<div id="5e4f0f23" class="cell" data-execution_count="15">
+<div id="06ccca5b" class="cell" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Create a new variable to store the hemisphere in which each country is located</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>north <span class="op">=</span> [<span class="st">"Asia"</span>, <span class="st">"Europe"</span>, <span class="st">"N. America"</span>]</span>
 <span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>south <span class="op">=</span> [<span class="st">"Africa"</span>, <span class="st">"Oceania"</span>, <span class="st">"S. America"</span>]</span>
 <span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>wb.loc[wb[<span class="st">"Continent"</span>].isin(north), <span class="st">"Hemisphere"</span>] <span class="op">=</span> <span class="st">"Northern"</span></span>
 <span id="cb16-5"><a href="#cb16-5" aria-hidden="true" tabindex="-1"></a>wb.loc[wb[<span class="st">"Continent"</span>].isin(south), <span class="st">"Hemisphere"</span>] <span class="op">=</span> <span class="st">"Southern"</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 </div>
-<div id="d6def327" class="cell" data-execution_count="16">
+<div id="c95cff3e" class="cell" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"Gross national income per capita, Atlas method: $: 2016"</span>, hue<span class="op">=</span><span class="st">"Hemisphere"</span>, stat<span class="op">=</span><span class="st">"density"</span>)</span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">"Distribution of gross national income per capita"</span>)<span class="op">;</span></span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-stderr">
@@ -1048,7 +1048,7 @@ <h4 data-number="7.6.3.2" class="anchored" data-anchor-id="overlaid-histograms">
 </div>
 </div>
 <p>Again, each bin of a histogram is scaled such that its <strong>area</strong> is proportional to the <strong>percentage</strong> of all datapoints that it contains.</p>
-<div id="4b96db5b" class="cell" data-execution_count="17">
+<div id="a2ab3cef" class="cell" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>densities, bins, _ <span class="op">=</span> plt.hist(gni, density<span class="op">=</span><span class="va">True</span>, edgecolor<span class="op">=</span><span class="st">"white"</span>, bins<span class="op">=</span><span class="dv">5</span>)</span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Gross national income per capita"</span>)</span>
 <span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>plt.ylabel(<span class="st">"Density"</span>)</span>
@@ -1089,7 +1089,7 @@ <h4 data-number="7.6.3.3" class="anchored" data-anchor-id="evaluating-histograms
 <section id="skewness-and-tails" class="level5" data-number="7.6.3.3.1">
 <h5 data-number="7.6.3.3.1" class="anchored" data-anchor-id="skewness-and-tails"><span class="header-section-number">7.6.3.3.1</span> Skewness and Tails</h5>
 <p>The skew of a histogram describes the direction in which its “tail” extends. - A distribution with a long right tail is <strong>skewed right</strong> (such as <code>Gross national income per capita</code>). In a right-skewed distribution, the few large outliers “pull” the mean to the <strong>right</strong> of the median.</p>
-<div id="b5c33229" class="cell" data-execution_count="18">
+<div id="22450148" class="cell" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Gross national income per capita, Atlas method: $: 2016'</span>, stat <span class="op">=</span> <span class="st">'density'</span>)<span class="op">;</span></span>
 <span id="cb21-2"><a href="#cb21-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Distribution with a long right tail'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="18">
@@ -1107,7 +1107,7 @@ <h5 data-number="7.6.3.3.1" class="anchored" data-anchor-id="skewness-and-tails"
 <li>A distribution with a long left tail is <strong>skewed left</strong> (such as <code>Access to an improved water source</code>). In a left-skewed distribution, the few small outliers “pull” the mean to the <strong>left</strong> of the median.</li>
 </ul>
 <p>In the case where a distribution has equal-sized right and left tails, it is <strong>symmetric</strong>. The mean is approximately <strong>equal</strong> to the median. Think of mean as the balancing point of the distribution.</p>
-<div id="e18c51f3" class="cell" data-execution_count="19">
+<div id="de723674" class="cell" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a>sns.histplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">'Access to an improved water source: </span><span class="sc">% o</span><span class="st">f population: 2015'</span>, stat <span class="op">=</span> <span class="st">'density'</span>)<span class="op">;</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>plt.title(<span class="st">'Distribution with a long left tail'</span>)</span></code><button title="Copy to Clipboard" class="code-copy-button"><i class="bi"></i></button></pre></div>
 <div class="cell-output cell-output-display" data-execution_count="19">
@@ -1130,7 +1130,7 @@ <h5 data-number="7.6.3.3.2" class="anchored" data-anchor-id="outliers"><span cla
 <h5 data-number="7.6.3.3.3" class="anchored" data-anchor-id="modes"><span class="header-section-number">7.6.3.3.3</span> Modes</h5>
 <p>In Data 100, we describe a “mode” of a histogram as a peak in the distribution. Often, however, it is difficult to determine what counts as its own “peak.” For example, the number of peaks in the distribution of HIV rates across different countries varies depending on the number of histogram bins we plot.</p>
 <p>If we set the number of bins to 5, the distribution appears unimodal.</p>
-<div id="5b8fcb73" class="cell" data-execution_count="20">
+<div id="0a1f5ddc" class="cell" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb25"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb25-1"><a href="#cb25-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Rename the very long column name for convenience</span></span>
 <span id="cb25-2"><a href="#cb25-2" aria-hidden="true" tabindex="-1"></a>wb <span class="op">=</span> wb.rename(columns<span class="op">=</span>{<span class="st">'Antiretroviral therapy coverage: </span><span class="sc">% o</span><span class="st">f people living with HIV: 2015'</span>:<span class="st">"HIV rate"</span>})</span>
 <span id="cb25-3"><a href="#cb25-3" aria-hidden="true" tabindex="-1"></a><span class="co"># With 5 bins, it seems that there is only one peak</span></span>
@@ -1144,7 +1144,7 @@ <h5 data-number="7.6.3.3.3" class="anchored" data-anchor-id="modes"><span class=
 </div>
 </div>
 </div>
-<div id="f26f20f3" class="cell" data-execution_count="21">
+<div id="be6789fc" class="cell" data-execution_count="21">
 <div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="co"># With 10 bins, there seem to be two peaks</span></span>
 <span id="cb26-2"><a href="#cb26-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb26-3"><a href="#cb26-3" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x<span class="op">=</span><span class="st">"HIV rate"</span>, stat<span class="op">=</span><span class="st">"density"</span>, bins<span class="op">=</span><span class="dv">10</span>)</span>
@@ -1157,7 +1157,7 @@ <h5 data-number="7.6.3.3.3" class="anchored" data-anchor-id="modes"><span class=
 </div>
 </div>
 </div>
-<div id="f5ba01b9" class="cell" data-execution_count="22">
+<div id="dff47386" class="cell" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb27"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb27-1"><a href="#cb27-1" aria-hidden="true" tabindex="-1"></a><span class="co"># And with 20 bins, it becomes hard to say what counts as a "peak"!</span></span>
 <span id="cb27-2"><a href="#cb27-2" aria-hidden="true" tabindex="-1"></a></span>
 <span id="cb27-3"><a href="#cb27-3" aria-hidden="true" tabindex="-1"></a>sns.histplot(data<span class="op">=</span>wb, x <span class="op">=</span><span class="st">"HIV rate"</span>, stat<span class="op">=</span><span class="st">"density"</span>, bins<span class="op">=</span><span class="dv">20</span>)</span>
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf
index a458871e..3e8d3531 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-10-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf
index 0e0439b6..af61953c 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-11-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf
index 0b5f7243..ef42699e 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-12-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf
index e464719e..5d29778e 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-13-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf
index 672d6a8d..5ea17417 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-14-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf
index 169b428b..45a45286 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-15-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf
index fdef39d4..195d2470 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-17-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf
index dd9b6cf8..c07990e9 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-18-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf
index 70ffe60e..d2fc52c9 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-19-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf
index 2f88a68f..936ab81c 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-20-output-2.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf
index a2377d21..3cecaf8b 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-21-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf
index 418c8cd6..6a901b63 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-22-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf
index 6a39baa2..d49530f7 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-23-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf
index 51f29df1..00e16d56 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-3-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf
index ba28d5fa..486df75b 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-4-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf
index 2e63d440..f4647ddf 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-5-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf
index 85300b32..b1f070f8 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-7-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf
index 6737cae9..ab18bff6 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-8-output-1.pdf differ
diff --git a/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf b/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf
index bf49ce38..0766ef28 100644
Binary files a/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf and b/docs/visualization_1/visualization_1_files/figure-pdf/cell-9-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2.html b/docs/visualization_2/visualization_2.html
index 16fefa15..d4336886 100644
--- a/docs/visualization_2/visualization_2.html
+++ b/docs/visualization_2/visualization_2.html
@@ -391,7 +391,7 @@ <h3 data-number="8.1.1" class="anchored" data-anchor-id="kde-theory"><span class
 <p>A <strong>kernel density estimate (KDE)</strong> is a smooth, continuous function that approximates a curve. It allows us to represent general trends in a distribution without focusing on the details, which is useful for analyzing the broad structure of a dataset.</p>
 <p>More formally, a KDE attempts to approximate the underlying <strong>probability distribution</strong> from which our dataset was drawn. You may have encountered the idea of a probability distribution in your other classes; if not, we’ll discuss it at length in the next lecture. For now, you can think of a probability distribution as a description of how likely it is for us to sample a particular value in our dataset.</p>
 <p>A KDE curve estimates the probability density function of a random variable. Consider the example below, where we have used <code>sns.displot</code> to plot both a histogram (containing the data points we actually collected) and a KDE curve (representing the <em>approximated</em> probability distribution from which this data was drawn) using data from the World Bank dataset (<code>wb</code>).</p>
-<div id="858ba71a" class="cell" data-execution_count="1">
+<div id="3ed2107c" class="cell" data-execution_count="1">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb1"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> pandas <span class="im">as</span> pd</span>
@@ -566,7 +566,7 @@ <h3 data-number="8.1.1" class="anchored" data-anchor-id="kde-theory"><span class
 </div>
 </div>
 </div>
-<div id="36e95b97" class="cell" data-execution_count="2">
+<div id="7c85946a" class="cell" data-execution_count="2">
 <div class="sourceCode cell-code" id="cb2"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> seaborn <span class="im">as</span> sns</span>
 <span id="cb2-2"><a href="#cb2-2" aria-hidden="true" tabindex="-1"></a><span class="im">import</span> matplotlib.pyplot <span class="im">as</span> plt</span>
 <span id="cb2-3"><a href="#cb2-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -595,7 +595,7 @@ <h3 data-number="8.1.2" class="anchored" data-anchor-id="constructing-a-kde"><sp
 </ol>
 <p>We’ll explain what a “kernel” is momentarily.</p>
 <p>To make things simpler, let’s construct a KDE for a small, artificially generated dataset of 5 datapoints: <span class="math inline">\([2.2, 2.8, 3.7, 5.3, 5.7]\)</span>. In the plot below, each vertical bar represents one data point.</p>
-<div id="000d11ca" class="cell" data-execution_count="3">
+<div id="cd8248ac" class="cell" data-execution_count="3">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb3"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb3-1"><a href="#cb3-1" aria-hidden="true" tabindex="-1"></a>data <span class="op">=</span> [<span class="fl">2.2</span>, <span class="fl">2.8</span>, <span class="fl">3.7</span>, <span class="fl">5.3</span>, <span class="fl">5.7</span>]</span>
@@ -616,7 +616,7 @@ <h3 data-number="8.1.2" class="anchored" data-anchor-id="constructing-a-kde"><sp
 </div>
 </div>
 <p>Our goal is to create the following KDE curve, which was generated automatically by <code>sns.kdeplot</code>.</p>
-<div id="5c1429f1" class="cell" data-execution_count="4">
+<div id="4e2bd505" class="cell" data-execution_count="4">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb4"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
@@ -635,7 +635,7 @@ <h3 data-number="8.1.2" class="anchored" data-anchor-id="constructing-a-kde"><sp
 </div>
 </div>
 <p>Alternatively, we can use <code>sns.histplot</code>. You can also get a very similar result in a single call by requesting the KDE be added to the histogram, with <code>kde=True</code> and some extra keywords:</p>
-<div id="9d80e7f3" class="cell" data-execution_count="5">
+<div id="26e2f13e" class="cell" data-execution_count="5">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb5"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>plt.xlabel(<span class="st">"Data"</span>)</span>
@@ -658,7 +658,7 @@ <h4 data-number="8.1.2.1" class="anchored" data-anchor-id="step-1-place-a-kernel
 <p>A <strong>kernel</strong> is a density curve. It is the mathematical function that attempts to capture the randomness of each data point in our sampled data. To explain what this means, consider just <em>one</em> of the datapoints in our dataset: <span class="math inline">\(2.2\)</span>. We obtained this datapoint by randomly sampling some information out in the real world (you can imagine <span class="math inline">\(2.2\)</span> as representing a single measurement taken in an experiment, for example). If we were to sample a new datapoint, we may obtain a slightly different value. It could be higher than <span class="math inline">\(2.2\)</span>; it could also be lower than <span class="math inline">\(2.2\)</span>. We make the assumption that any future sampled datapoints will likely be similar in value to the data we’ve already drawn. This means that our <em>kernel</em> – our description of the probability of randomly sampling any new value – will be greatest at the datapoint we’ve already drawn but still have non-zero probability above and below it. The area under any kernel should integrate to 1, representing the total probability of drawing a new datapoint.</p>
 <p>A <strong>bandwidth value</strong>, usually denoted by <span class="math inline">\(\alpha\)</span>, represents the width of the kernel. A large value of <span class="math inline">\(\alpha\)</span> will result in a wide, short kernel function, while a small value with result in a narrow, tall kernel.</p>
 <p>Below, we place a <strong>Gaussian kernel</strong>, plotted in orange, over the datapoint <span class="math inline">\(2.2\)</span>. A Gaussian kernel is simply the normal distribution, which you may have called a bell curve in Data 8.</p>
-<div id="f975af56" class="cell" data-execution_count="6">
+<div id="4cc05907" class="cell" data-execution_count="6">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb6"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb6-1"><a href="#cb6-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> gaussian_kernel(x, z, a):</span>
@@ -686,7 +686,7 @@ <h4 data-number="8.1.2.1" class="anchored" data-anchor-id="step-1-place-a-kernel
 </div>
 </div>
 <p>To begin creating our KDE, we place a kernel on <em>each</em> datapoint in our dataset. For our dataset of 5 points, we will have 5 kernels.</p>
-<div id="e14366fd" class="cell" data-execution_count="7">
+<div id="8bb61a10" class="cell" data-execution_count="7">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb7"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a><span class="co"># You will work with the functions below in Lab 4</span></span>
@@ -738,7 +738,7 @@ <h4 data-number="8.1.2.1" class="anchored" data-anchor-id="step-1-place-a-kernel
 <h4 data-number="8.1.2.2" class="anchored" data-anchor-id="step-2-normalize-kernels-to-have-a-total-area-of-1"><span class="header-section-number">8.1.2.2</span> Step 2: Normalize Kernels to Have a Total Area of 1</h4>
 <p>Above, we said that <em>each</em> kernel has an area of 1. Earlier, we also said that our goal is to construct a KDE curve using these kernels with a <em>total</em> area of 1. If we were to directly sum the kernels as they are, we would produce a KDE curve with an integrated area of (5 kernels) <span class="math inline">\(\times\)</span> (area of 1 each) = 5. To avoid this, we will <strong>normalize</strong> each of our kernels. This involves multiplying each kernel by <span class="math inline">\(\frac{1}{\#\:\text{datapoints}}\)</span>.</p>
 <p>In the cell below, we multiply each of our 5 kernels by <span class="math inline">\(\frac{1}{5}\)</span> to apply normalization.</p>
-<div id="c9481621" class="cell" data-execution_count="8">
+<div id="770267c3" class="cell" data-execution_count="8">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb8"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb8-1"><a href="#cb8-1" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
@@ -761,7 +761,7 @@ <h4 data-number="8.1.2.2" class="anchored" data-anchor-id="step-2-normalize-kern
 <section id="step-3-sum-the-normalized-kernels" class="level4" data-number="8.1.2.3">
 <h4 data-number="8.1.2.3" class="anchored" data-anchor-id="step-3-sum-the-normalized-kernels"><span class="header-section-number">8.1.2.3</span> Step 3: Sum the Normalized Kernels</h4>
 <p>Our KDE curve is the sum of the normalized kernels. Notice that the final curve is identical to the plot generated by <code>sns.kdeplot</code> we saw earlier!</p>
-<div id="c9644e37" class="cell" data-execution_count="9">
+<div id="697a842f" class="cell" data-execution_count="9">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb9"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>plt.xlim(<span class="op">-</span><span class="dv">3</span>, <span class="dv">10</span>)</span>
@@ -871,7 +871,7 @@ <h4 data-number="8.1.3.2" class="anchored" data-anchor-id="boxcar-kernel"><span
         0, &amp; \text{else }
     \end{cases}\]</span></p>
 <p>The boxcar kernel is seldom used in practice – we include it here to demonstrate that a kernel function can take whatever form you would like, provided it integrates to 1 and does not output negative values.</p>
-<div id="f3bf085a" class="cell" data-execution_count="10">
+<div id="8fcef371" class="cell" data-execution_count="10">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb10"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="kw">def</span> boxcar_kernel(alpha, x, z):</span>
@@ -918,7 +918,7 @@ <h2 data-number="8.2" class="anchored" data-anchor-id="diving-deeper-into-displo
 <p>As we saw earlier, we can use <code>seaborn</code>’s <code>displot</code> function to plot various distributions. In particular, <code>displot</code> allows you to specify the <code>kind</code> of plot and is a wrapper for <code>histplot</code>, <code>kdeplot</code>, and <code>ecdfplot</code>.</p>
 <p>Below, we can see a couple of examples of how <code>sns.displot</code> can be used to plot various distributions.</p>
 <p>First, we can plot a histogram by setting <code>kind</code> to <code>"hist"</code>. Note that here we’ve specified <code>stat = density</code> to normalize the histogram such that the area under the histogram is equal to 1.</p>
-<div id="f0f69e83" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
+<div id="deefb803" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="11">
 <div class="sourceCode cell-code" id="cb11"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb11-1"><a href="#cb11-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
 <span id="cb11-2"><a href="#cb11-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
 <span id="cb11-3"><a href="#cb11-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">"hist"</span>, </span>
@@ -933,7 +933,7 @@ <h2 data-number="8.2" class="anchored" data-anchor-id="diving-deeper-into-displo
 </div>
 </div>
 <p>Now, what if we want to generate a KDE plot? We can set <code>kind</code> = to <code>"kde"</code>!</p>
-<div id="8b885091" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
+<div id="e3195e20" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="12">
 <div class="sourceCode cell-code" id="cb12"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
 <span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
 <span id="cb12-3"><a href="#cb12-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">'kde'</span>)</span>
@@ -947,7 +947,7 @@ <h2 data-number="8.2" class="anchored" data-anchor-id="diving-deeper-into-displo
 </div>
 </div>
 <p>And finally, if we want to generate an Empirical Cumulative Distribution Function (ECDF), we can specify <code>kind = "ecdf"</code>.</p>
-<div id="2a0b38b5" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
+<div id="5019b0db" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="13">
 <div class="sourceCode cell-code" id="cb13"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a>sns.displot(data<span class="op">=</span>wb, </span>
 <span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a>            x<span class="op">=</span><span class="st">"gni"</span>, </span>
 <span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a>            kind<span class="op">=</span><span class="st">'ecdf'</span>)</span>
@@ -968,7 +968,7 @@ <h2 data-number="8.3" class="anchored" data-anchor-id="relationships-between-qua
 <h4 data-number="8.3.0.1" class="anchored" data-anchor-id="scatter-plots"><span class="header-section-number">8.3.0.1</span> Scatter Plots</h4>
 <p><strong>Scatter plots</strong> are one of the most useful tools in representing the relationship between <strong>pairs</strong> of quantitative variables. They are particularly important in gauging the strength, or correlation, of the relationship between variables. Knowledge of these relationships can then motivate decisions in our modeling process.</p>
 <p>In <code>matplotlib</code>, we use the function <code>plt.scatter</code> to generate a scatter plot. Notice that, unlike our examples of plotting single-variable distributions, now we specify sequences of values to be plotted along the x-axis <em>and</em> the y-axis.</p>
-<div id="a82e9c1d" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
+<div id="a1d7717e" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="14">
 <div class="sourceCode cell-code" id="cb14"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a>plt.scatter(wb[<span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>], <span class="op">\</span></span>
 <span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>            wb[<span class="st">'Adult literacy rate: Female: % ages 15 and older: 2005-14'</span>])</span>
 <span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -984,7 +984,7 @@ <h4 data-number="8.3.0.1" class="anchored" data-anchor-id="scatter-plots"><span
 </div>
 </div>
 <p>In <code>seaborn</code>, we call the function <code>sns.scatterplot</code>. We use the <code>x</code> and <code>y</code> parameters to indicate the values to be plotted along the x and y axes, respectively. By using the <code>hue</code> parameter, we can specify a third variable to be used for coloring each scatter point.</p>
-<div id="a95d532f" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
+<div id="a2748132" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="15">
 <div class="sourceCode cell-code" id="cb15"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>sns.scatterplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a>               y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, </span>
 <span id="cb15-3"><a href="#cb15-3" aria-hidden="true" tabindex="-1"></a>               hue <span class="op">=</span> <span class="st">"Continent"</span>)</span>
@@ -1007,7 +1007,7 @@ <h5 data-number="8.3.0.1.1" class="anchored" data-anchor-id="overplotting"><span
 <li><strong>Jittering</strong> is the process of adding a small amount of random noise to all x and y values to slightly shift the position of each datapoint. By randomly shifting all the data by some small distance, we can discern individual points more clearly without modifying the major trends of the original dataset.</li>
 </ul>
 <p>In the cell below, we first jitter the data using <code>np.random.uniform</code>, then re-plot it with smaller markers. The resulting plot is much easier to interpret.</p>
-<div id="3c879a41" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
+<div id="51d7b6f8" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="16">
 <div class="sourceCode cell-code" id="cb16"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Setting a seed ensures that we produce the same plot each time</span></span>
 <span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a><span class="co"># This means that the course notes will not change each time you access them</span></span>
 <span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a>np.random.seed(<span class="dv">150</span>)</span>
@@ -1041,7 +1041,7 @@ <h5 data-number="8.3.0.1.1" class="anchored" data-anchor-id="overplotting"><span
 <h4 data-number="8.3.0.2" class="anchored" data-anchor-id="lmplot-and-jointplot"><span class="header-section-number">8.3.0.2</span> <code>lmplot</code> and <code>jointplot</code></h4>
 <p><code>seaborn</code> also includes several built-in functions for creating more sophisticated scatter plots. Two of the most commonly used examples are <code>sns.lmplot</code> and <code>sns.jointplot</code>.</p>
 <p><code>sns.lmplot</code> plots both a scatter plot <em>and</em> a linear regression line, all in one function call. We’ll discuss linear regression in a few lectures.</p>
-<div id="f0ca8927" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
+<div id="36df2ed5" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="17">
 <div class="sourceCode cell-code" id="cb17"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a>sns.lmplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb17-2"><a href="#cb17-2" aria-hidden="true" tabindex="-1"></a>           y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>)</span>
 <span id="cb17-3"><a href="#cb17-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1055,7 +1055,7 @@ <h4 data-number="8.3.0.2" class="anchored" data-anchor-id="lmplot-and-jointplot"
 </div>
 </div>
 <p><code>sns.jointplot</code> creates a visualization with three components: a scatter plot, a histogram of the distribution of x values, and a histogram of the distribution of y values.</p>
-<div id="e0360a0f" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
+<div id="ed0984a5" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="18">
 <div class="sourceCode cell-code" id="cb18"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>sns.jointplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb18-2"><a href="#cb18-2" aria-hidden="true" tabindex="-1"></a>           y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>)</span>
 <span id="cb18-3"><a href="#cb18-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1076,7 +1076,7 @@ <h4 data-number="8.3.0.3" class="anchored" data-anchor-id="hex-plots"><span clas
 <p>For datasets with a very large number of datapoints, jittering is unlikely to fully resolve the issue of overplotting. In these cases, we can attempt to visualize our data by its <em>density</em>, rather than displaying each individual datapoint.</p>
 <p><strong>Hex plots</strong> can be thought of as two-dimensional histograms that show the joint distribution between two variables. This is particularly useful when working with very dense data. In a hex plot, the x-y plane is binned into hexagons. Hexagons that are darker in color indicate a greater density of data – that is, there are more data points that lie in the region enclosed by the hexagon.</p>
 <p>We can generate a hex plot using <code>sns.jointplot</code> modified with the <code>kind</code> parameter.</p>
-<div id="a0f58bdd" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
+<div id="8e2c8048" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="19">
 <div class="sourceCode cell-code" id="cb19"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb19-1"><a href="#cb19-1" aria-hidden="true" tabindex="-1"></a>sns.jointplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb19-2"><a href="#cb19-2" aria-hidden="true" tabindex="-1"></a>              y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, <span class="op">\</span></span>
 <span id="cb19-3"><a href="#cb19-3" aria-hidden="true" tabindex="-1"></a>              kind <span class="op">=</span> <span class="st">"hex"</span>)</span>
@@ -1097,7 +1097,7 @@ <h4 data-number="8.3.0.3" class="anchored" data-anchor-id="hex-plots"><span clas
 <h4 data-number="8.3.0.4" class="anchored" data-anchor-id="contour-plots"><span class="header-section-number">8.3.0.4</span> Contour Plots</h4>
 <p><strong>Contour plots</strong> are an alternative way of plotting the joint distribution of two variables. You can think of them as the 2-dimensional versions of KDE plots. A contour plot can be interpreted in a similar way to a <a href="https://gisgeography.com/contour-lines-topographic-map/">topographic map</a>. Each contour line represents an area that has the same <em>density</em> of datapoints throughout the region. Contours marked with darker colors contain more datapoints (a higher density) in that region.</p>
 <p><code>sns.kdeplot</code> will generate a contour plot if we specify both x and y data.</p>
-<div id="1bfd48b6" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
+<div id="55870de0" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="20">
 <div class="sourceCode cell-code" id="cb20"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb20-1"><a href="#cb20-1" aria-hidden="true" tabindex="-1"></a>sns.kdeplot(data <span class="op">=</span> wb, x <span class="op">=</span> <span class="st">"per capita: </span><span class="sc">% g</span><span class="st">rowth: 2016"</span>, <span class="op">\</span></span>
 <span id="cb20-2"><a href="#cb20-2" aria-hidden="true" tabindex="-1"></a>            y <span class="op">=</span> <span class="st">"Adult literacy rate: Female: % ages 15 and older: 2005-14"</span>, <span class="op">\</span></span>
 <span id="cb20-3"><a href="#cb20-3" aria-hidden="true" tabindex="-1"></a>            fill <span class="op">=</span> <span class="va">True</span>)</span>
@@ -1119,7 +1119,7 @@ <h2 data-number="8.4" class="anchored" data-anchor-id="transformations"><span cl
 <p>Much of this was done to uncover insights in data, which will prove necessary when we begin building models of data later in the course. A strong graphical correlation between two variables hints at an underlying relationship that we may want to study in greater detail. However, relying on visual relationships alone is limiting - not all plots show association. The presence of outliers and other statistical anomalies makes it hard to interpret data.</p>
 <p><strong>Transformations</strong> are the process of manipulating data to find significant relationships between variables. These are often found by applying mathematical functions to variables that “transform” their range of possible values and highlight some previously hidden associations between data.</p>
 <p>To see why we may want to transform data, consider the following plot of adult literacy rates against gross national income.</p>
-<div id="09f412e7" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
+<div id="181c354a" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="21">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb21"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb21-1"><a href="#cb21-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Some data cleaning to help with the next example</span></span>
@@ -1163,7 +1163,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 </ul>
 <p>One function that produces this result is the <strong>log transformation</strong>. When we take the logarithm of a large number, the original number will decrease in magnitude dramatically. Conversely, when we take the logarithm of a small number, the original number does not change its value by as significant of an amount (to illustrate this, consider the difference between <span class="math inline">\(\log{(100)} = 4.61\)</span> and <span class="math inline">\(\log{(10)} = 2.3\)</span>).</p>
 <p>In Data 100 (and most upper-division STEM classes), <span class="math inline">\(\log\)</span> is used to refer to the natural logarithm with base <span class="math inline">\(e\)</span>.</p>
-<div id="98210741" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
+<div id="d40b6be0" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="22">
 <div class="sourceCode cell-code" id="cb22"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb22-1"><a href="#cb22-1" aria-hidden="true" tabindex="-1"></a><span class="co"># np.log takes the logarithm of an array or Series</span></span>
 <span id="cb22-2"><a href="#cb22-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>])</span>
 <span id="cb22-3"><a href="#cb22-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1186,7 +1186,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <li>Not substantially alter the scaling of small values of y (we do not want to drastically modify the lower end of the y axis, which is already distributed evenly on the vertical scale).</li>
 </ul>
 <p>In this case, it is helpful to apply a <strong>power transformation</strong> – that is, raise our y values to a power. Let’s try raising our adult literacy rate values to the power of 4. Large values raised to the power of 4 will increase in magnitude proportionally much more than small values raised to the power of 4 (consider the difference between <span class="math inline">\(2^4 = 16\)</span> and <span class="math inline">\(200^4 = 1600000000\)</span>).</p>
-<div id="0a2ebc06" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="23">
+<div id="93f009e2" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="23">
 <div class="sourceCode cell-code" id="cb23"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb23-1"><a href="#cb23-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Apply a log transformation to the x values and a power transformation to the y values</span></span>
 <span id="cb23-2"><a href="#cb23-2" aria-hidden="true" tabindex="-1"></a>plt.scatter(np.log(df[<span class="st">"inc"</span>]), df[<span class="st">"lit"</span>]<span class="op">**</span><span class="dv">4</span>)</span>
 <span id="cb23-3"><a href="#cb23-3" aria-hidden="true" tabindex="-1"></a></span>
@@ -1207,7 +1207,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <p><span class="math display">\[y^4 = m(\log{x}) + b\]</span></p>
 <p>Where <span class="math inline">\(m\)</span> represents the slope of the linear fit, while <span class="math inline">\(b\)</span> represents the intercept.</p>
 <p>The cell below computes <span class="math inline">\(m\)</span> and <span class="math inline">\(b\)</span> for our transformed data. We’ll discuss how this code was generated in a future lecture.</p>
-<div id="02b40203" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="24">
+<div id="3fec3423" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="24">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb24"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb24-1"><a href="#cb24-1" aria-hidden="true" tabindex="-1"></a><span class="co"># The code below fits a linear regression model. We'll discuss it at length in a future lecture</span></span>
@@ -1245,7 +1245,7 @@ <h3 data-number="8.4.1" class="anchored" data-anchor-id="linearization-and-apply
 <p>By rearranging the equation, we find a relationship between the untransformed variables <span class="math inline">\(x\)</span> and <span class="math inline">\(y\)</span>.</p>
 <p><span class="math display">\[y = [m(\log{x}) + b]^{(1/4)}\]</span></p>
 <p>When we plug in the values for <span class="math inline">\(m\)</span> and <span class="math inline">\(b\)</span> computed above, something interesting happens.</p>
-<div id="6e0e381f" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="25">
+<div id="907c42d6" class="cell" data-vscode="{&quot;languageId&quot;:&quot;python&quot;}" data-execution_count="25">
 <details class="code-fold">
 <summary>Code</summary>
 <div class="sourceCode cell-code" id="cb26"><pre class="sourceCode python code-with-copy"><code class="sourceCode python"><span id="cb26-1"><a href="#cb26-1" aria-hidden="true" tabindex="-1"></a><span class="co"># Now, plug the values for m and b into the relationship between the untransformed x and y</span></span>
diff --git a/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png
index 2b80b328..767cc696 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png and b/docs/visualization_2/visualization_2_files/figure-html/cell-18-output-1.png differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf
index 4583525c..e3db7673 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-10-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf
index 2c28274b..9e74069d 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-11-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf
index 7cf10942..a20886cc 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-12-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf
index f485bfc9..b3ae1dc6 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-13-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf
index 50a8e202..26f3f67f 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-14-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf
index 210c2059..62c63b83 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-15-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf
index 036b3099..93813f9b 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-16-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf
index 301ace51..40c6adcc 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-17-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf
index a36636be..bc148bd4 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-18-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf
index 09eff94b..e8ce7c29 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-19-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf
index f96c6750..0f879da4 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-20-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf
index ecd532bd..cfce5a0f 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-21-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf
index f736a651..fc607ab8 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-22-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf
index 049cfd2c..0ca287a7 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-23-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf
index 5059d632..5e7c6e92 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-24-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf
index 3f288937..215d6a07 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-25-output-2.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf
index 89bdb37e..a64f2a14 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-26-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf
index ac6a56ee..0fc1798a 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-3-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf
index b4c8f691..10b34162 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-4-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf
index 9c81d2ce..423eb041 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-5-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf
index 8a2496ef..2dc70def 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-6-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf
index aa0f4d57..d1b24675 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-7-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf
index 8c9b8bbc..f1d4e565 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-8-output-1.pdf differ
diff --git a/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf b/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf
index a825a356..1617637c 100644
Binary files a/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf and b/docs/visualization_2/visualization_2_files/figure-pdf/cell-9-output-1.pdf differ
diff --git a/index.tex b/index.tex
index 18c53ded..e77abc05 100644
--- a/index.tex
+++ b/index.tex
@@ -247,7 +247,7 @@ \section*{About the Course Notes}\label{about-the-course-notes}
 
 \chapter{Introduction}\label{introduction}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -301,7 +301,7 @@ \chapter{Introduction}\label{introduction}
 allowing you to take data and produce useful insights on the world's
 most challenging and ambiguous problems.
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Course Goals}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Course Goals}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -319,7 +319,7 @@ \chapter{Introduction}\label{introduction}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Some Topics We'll Cover}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Some Topics We'll Cover}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -349,7 +349,7 @@ \chapter{Introduction}\label{introduction}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Prerequisites}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Prerequisites}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 To ensure that you can get the most out of the course content, please
 make sure that you are familiar with:
@@ -580,7 +580,7 @@ \section{Conclusion}\label{conclusion}
 
 \chapter{Pandas I}\label{pandas-i}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -1920,7 +1920,7 @@ \section{Parting Note}\label{parting-note}
 
 \chapter{Pandas II}\label{pandas-ii}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -2691,7 +2691,7 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-223760 & CA & F & 2018 & Ollie & 8 \\
+141435 & CA & F & 1997 & Agnes & 5 \\
 \end{longtable}
 
 Naturally, this can be chained with other methods and operators
@@ -2711,11 +2711,11 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-210165 & 2015 & Annabel & 68 \\
-127239 & 1993 & Jacinda & 5 \\
-73452 & 1975 & Idalia & 5 \\
-6376 & 1924 & Ruth & 617 \\
-68981 & 1973 & Nannette & 6 \\
+126292 & 1993 & Marsha & 8 \\
+32323 & 1953 & Jorja & 8 \\
+204180 & 2013 & Ciena & 8 \\
+361602 & 2007 & Vaughn & 47 \\
+303016 & 1983 & Marc & 242 \\
 \end{longtable}
 
 \begin{Shaded}
@@ -2732,10 +2732,10 @@ \subsection{\texorpdfstring{\texttt{.sample()}}{.sample()}}\label{sample}
 \endhead
 \bottomrule\noalign{}
 \endlastfoot
-343204 & 2000 & Deangelo & 25 \\
-343106 & 2000 & Jamie & 31 \\
-151633 & 2000 & Varsha & 8 \\
-342730 & 2000 & Joaquin & 132 \\
+343840 & 2000 & Lauro & 10 \\
+152449 & 2000 & Estephania & 5 \\
+151643 & 2000 & Zaina & 8 \\
+149783 & 2000 & Irma & 46 \\
 \end{longtable}
 
 \subsection{\texorpdfstring{\texttt{.value\_counts()}}{.value\_counts()}}\label{value_counts}
@@ -2857,7 +2857,7 @@ \section{Parting Note}\label{parting-note-1}
 
 \chapter{Pandas III}\label{pandas-iii}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -3127,7 +3127,7 @@ \section{\texorpdfstring{Aggregating Data with
 \end{Shaded}
 
 \begin{verbatim}
-<pandas.core.groupby.generic.DataFrameGroupBy object at 0x10e3bdb20>
+<pandas.core.groupby.generic.DataFrameGroupBy object at 0x120d362d0>
 \end{verbatim}
 
 What does this strange output mean? Calling \texttt{.groupby}
@@ -3467,7 +3467,7 @@ \subsection{Plotting Birth Counts}\label{plotting-birth-counts}
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98198/390646742.py:1: FutureWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32663/390646742.py:1: FutureWarning:
 
 The provided callable <built-in function sum> is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 \end{verbatim}
@@ -4118,7 +4118,7 @@ \subsection{\texorpdfstring{Aggregation with \texttt{lambda}
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98198/4278286395.py:1: FutureWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32663/4278286395.py:1: FutureWarning:
 
 The provided callable <built-in function max> is currently using DataFrameGroupBy.max. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "max" instead.
 \end{verbatim}
@@ -4347,7 +4347,7 @@ \section{Aggregating Data with Pivot
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98198/3186035650.py:3: FutureWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32663/3186035650.py:3: FutureWarning:
 
 The provided callable <built-in function sum> is currently using DataFrameGroupBy.sum. In a future version of pandas, the provided callable will be used directly. To keep current behavior pass the string "sum" instead.
 \end{verbatim}
@@ -4674,7 +4674,7 @@ \chapter{Data Cleaning and EDA}\label{data-cleaning-and-eda}
 \end{Highlighting}
 \end{Shaded}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -5518,7 +5518,7 @@ \subsubsection{\texorpdfstring{Temporality with \texttt{pandas}'
 \end{Shaded}
 
 \begin{verbatim}
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98231/874729699.py:1: UserWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32688/874729699.py:1: UserWarning:
 
 Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.
 \end{verbatim}
@@ -6801,7 +6801,7 @@ \subsection{Exploring Variable Feature
 
 invalid escape sequence '\s'
 
-/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_98231/150137587.py:3: SyntaxWarning:
+/var/folders/ks/dgd81q6j5b7ghm1zc_4483vr0000gn/T/ipykernel_32688/150137587.py:3: SyntaxWarning:
 
 invalid escape sequence '\s'
 \end{verbatim}
@@ -7359,7 +7359,7 @@ \subsection{EDA and Data Wrangling}\label{eda-and-data-wrangling}
 
 \chapter{Regular Expressions}\label{regular-expressions}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -8578,7 +8578,7 @@ \section{Limitations of Regular
 
 \chapter{Visualization I}\label{visualization-i}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -9368,7 +9368,7 @@ \subsubsection{Evaluating Histograms}\label{evaluating-histograms}
 
 \chapter{Visualization II}\label{visualization-ii}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -10764,7 +10764,7 @@ \subsection{Harnessing Context}\label{harnessing-context}
 
 \chapter{Sampling}\label{sampling}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -11350,7 +11350,7 @@ \subsubsection{Simple Random Sample}\label{simple-random-sample}
 \end{Shaded}
 
 \begin{verbatim}
-np.float64(0.5304650023929036)
+np.float64(0.5310103618292914)
 \end{verbatim}
 
 This is very close to the actual vote of 0.5302792307692308!
@@ -11374,8 +11374,8 @@ \subsubsection{Simple Random Sample}\label{simple-random-sample}
 \end{Highlighting}
 \end{Shaded}
 
-\textbf{Actual} = 0.5303, \textbf{Sample} = 0.4988, \textbf{Err} =
-5.95\%.
+\textbf{Actual} = 0.5303, \textbf{Sample} = 0.5463, \textbf{Err} =
+3.01\%.
 
 We'll learn how to choose this number when we (re)learn the Central
 Limit Theorem later in the semester.
@@ -11423,7 +11423,7 @@ \subsubsection{Quantifying Chance Error}\label{quantifying-chance-error}
 \end{Shaded}
 
 \begin{verbatim}
-np.float64(0.957)
+np.float64(0.962)
 \end{verbatim}
 
 You can see the curve looks roughly Gaussian/normal. Using KDE:
@@ -11454,7 +11454,7 @@ \section{Summary}\label{summary-1}
 
 \chapter{Introduction to Modeling}\label{introduction-to-modeling}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -11796,7 +11796,7 @@ \subsection{Derivation}\label{derivation}
   \(\hat{a} = \text{average of }y - \text{slope}\cdot\text{average of }x\)
 \end{itemize}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, opacityback=0, colframe=quarto-callout-color-frame, rightrule=.15mm, bottomrule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, rightrule=.15mm, left=2mm, toprule=.15mm, bottomrule=.15mm, breakable, arc=.35mm, colback=white, leftrule=.75mm, opacityback=0, colframe=quarto-callout-color-frame]
 
 Proof:
 
@@ -12445,7 +12445,7 @@ \subsection{Four Mysterious Datasets (Anscombe's
 \chapter{Constant Model, Loss, and
 Transformations}\label{constant-model-loss-and-transformations}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -12604,7 +12604,7 @@ \subsection{\texorpdfstring{Deriving the optimal
 \begin{align}
 0 &= {\frac{-2}{n}}\sum^{n}_{i=1} (y_i - \hat{\theta_0})
 \\ &= \sum^{n}_{i=1} (y_i - \hat{\theta_0}) \quad \quad \text{divide both sides by} \frac{-2}{n}
-\\ &= \left(\sum^{n}_{i=1} y_i\right) - \left(\sum^{n}_{i=1} \theta_0\right) \quad \quad \text{separate sums}
+\\ &= \left(\sum^{n}_{i=1} y_i\right) - \left(\sum^{n}_{i=1} \hat{\theta_0}\right) \quad \quad \text{separate sums}
 \\ &= \left(\sum^{n}_{i=1} y_i\right) - (n \cdot \hat{\theta_0}) \quad \quad  \text{c + c + … + c = nc}
 \\ n \cdot \hat{\theta_0} &= \sum^{n}_{i=1} y_i
 \\ \hat{\theta_0} &= \frac{1}{n} \sum^{n}_{i=1} y_i
@@ -13430,7 +13430,7 @@ \section{Bonus: Calculating Constant Model MSE Using an Algebraic
 
 \chapter{Ordinary Least Squares}\label{ordinary-least-squares}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -13604,7 +13604,7 @@ \subsection{Multiple Linear
 
 \subsection{Linear Algebra Approach}\label{linear-algebra-approach}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, colback=white, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Vector Dot Product}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Vector Dot Product}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-tip-color-frame]
 
 The \textbf{dot product (or inner product)} is a vector operation that:
 
@@ -13702,7 +13702,7 @@ \subsection{Linear Algebra Approach}\label{linear-algebra-approach}
 \(\mathbb{Y}\) is also a vector with \(n\) elements
 (\(\mathbb{Y} \in \mathbb{R}^{n}\)).
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, colback=white, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Linearity}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra Review: Linearity}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-tip-color-frame]
 
 An expression is \textbf{linear in \(\theta\)} (a set of parameters) if
 it is a linear combination of the elements of the set. Checking if an
@@ -13746,7 +13746,7 @@ \subsection{Mean Squared Error}\label{mean-squared-error}
 indication of how ``far away'' the predictions are from the true values,
 on average.
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, colback=white, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: L2 Norm}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: L2 Norm}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-tip-color-frame]
 
 When working with vectors, this idea of ``distance'' or the vector's
 \textbf{size/length} is represented by the \textbf{norm}. More
@@ -13892,7 +13892,7 @@ \subsection{A Note on Terminology for Multiple Linear
 
 \section{Geometric Derivation}\label{geometric-derivation}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, colback=white, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Span}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Span}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-tip-color-frame]
 
 Recall that the \textbf{span} or \textbf{column space} of a matrix
 \(\mathbb{X}\) (denoted \(span(\mathbb{X})\)) is the set of all possible
@@ -13904,7 +13904,7 @@ \section{Geometric Derivation}\label{geometric-derivation}
 
 \end{tcolorbox}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, colback=white, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Matrix-Vector Multiplication}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Matrix-Vector Multiplication}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-tip-color-frame]
 
 There are 2 ways we can think about matrix-vector multiplication
 
@@ -13993,7 +13993,7 @@ \section{Geometric Derivation}\label{geometric-derivation}
 visualize this as the vector created by dropping a perpendicular line
 from \(\mathbb{Y}\) onto the span of \(\mathbb{X}\).
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, colback=white, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Orthogonality}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Linear Algebra: Orthogonality}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-tip-color-frame]
 
 Recall that two vectors \(\vec{a}\) and \(\vec{b}\) are orthogonal if
 their dot product is zero: \(\vec{a}^{T}\vec{b} = 0\).
@@ -14111,7 +14111,7 @@ \section{OLS Properties}\label{ols-properties}
 
 \[\mathbb{X}^Te = 0 \]
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, opacityback=0, colframe=quarto-callout-color-frame, rightrule=.15mm, bottomrule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, rightrule=.15mm, left=2mm, toprule=.15mm, bottomrule=.15mm, breakable, arc=.35mm, colback=white, leftrule=.75mm, opacityback=0, colframe=quarto-callout-color-frame]
 
 Proof:
 
@@ -14149,7 +14149,7 @@ \section{OLS Properties}\label{ols-properties}
 
 \[\sum_i^n e_i = 0\]
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, opacityback=0, colframe=quarto-callout-color-frame, rightrule=.15mm, bottomrule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, rightrule=.15mm, left=2mm, toprule=.15mm, bottomrule=.15mm, breakable, arc=.35mm, colback=white, leftrule=.75mm, opacityback=0, colframe=quarto-callout-color-frame]
 
 Proof:
 
@@ -14214,7 +14214,7 @@ \section{Bonus: Uniqueness of the
 The Least Squares estimate \(\hat{\theta}\) is \textbf{unique} if and
 only if \(\mathbb{X}\) is \textbf{full column rank}.
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, left=2mm, breakable, colback=white, arc=.35mm, leftrule=.75mm, opacityback=0, colframe=quarto-callout-color-frame, rightrule=.15mm, bottomrule=.15mm]
+\begin{tcolorbox}[enhanced jigsaw, rightrule=.15mm, left=2mm, toprule=.15mm, bottomrule=.15mm, breakable, arc=.35mm, colback=white, leftrule=.75mm, opacityback=0, colframe=quarto-callout-color-frame]
 
 Proof:
 
@@ -14303,7 +14303,7 @@ \section{Bonus: Uniqueness of the
 \chapter{sklearn and Gradient
 Descent}\label{sklearn-and-gradient-descent}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -15203,7 +15203,7 @@ \subsubsection{\texorpdfstring{Gradient Descent on the \texttt{tips}
 
 \chapter{Feature Engineering}\label{feature-engineering}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -15351,7 +15351,7 @@ \subsubsection{The Gradient Vector}\label{the-gradient-vector}
 On a 2D (or higher) surface, the best way to go down (gradient) is
 described by a \emph{vector}.
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, colback=white, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Aside: Partial Derivatives}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Aside: Partial Derivatives}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-tip-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -16031,7 +16031,7 @@ \section{Complexity and Overfitting}\label{complexity-and-overfitting}
 we can improve model performance by designing increasingly complex
 models.
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, colback=white, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Fact: Polynomial Degrees}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Math Fact: Polynomial Degrees}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-tip-color-frame]
 
 Given \(N\) overlapping data points, we can always find a polynomial of
 degree \(N-1\) that goes through all those points.
@@ -16125,7 +16125,7 @@ \chapter{Case Study in Human Contexts and
 understand the material. The course notes will have the same broader
 structure but are by no means comprehensive.
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Learning Outcomes}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -16344,7 +16344,7 @@ \section{The Response: Cook County Open Data
 \subsection{1. Question/Problem
 Formulation}\label{questionproblem-formulation}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -16384,7 +16384,7 @@ \subsection{1. Question/Problem
 can determine some metrics of success and frame a social problem as a
 data science problem.
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, colback=white, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Definitions: Fairness and Transparency}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-tip-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-tip-color}{\faLightbulb}\hspace{0.5em}{Definitions: Fairness and Transparency}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-tip-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-tip-color-frame]
 
 The definitions, as given by the Cook County Assessor's Office, are
 given below:
@@ -16476,7 +16476,7 @@ \subsection{1. Question/Problem
 \subsection{2. Data Acquisition and
 Cleaning}\label{data-acquisition-and-cleaning}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -16552,7 +16552,7 @@ \subsection{2. Data Acquisition and
 \subsection{3. Exploratory Data
 Analysis}\label{exploratory-data-analysis}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -16596,7 +16596,7 @@ \subsection{3. Exploratory Data
 
 \subsection{4. Prediction and Inference}\label{prediction-and-inference}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist
@@ -16657,7 +16657,7 @@ \subsection{4. Prediction and Inference}\label{prediction-and-inference}
 
 \subsection{5. Results and Conclusions}\label{results-and-conclusions}
 
-\begin{tcolorbox}[enhanced jigsaw, toprule=.15mm, colbacktitle=quarto-callout-note-color!10!white, colback=white, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, leftrule=.75mm, opacityback=0, colframe=quarto-callout-note-color-frame, bottomtitle=1mm, coltitle=black, left=2mm, breakable, arc=.35mm, opacitybacktitle=0.6, rightrule=.15mm, bottomrule=.15mm, toptitle=1mm, titlerule=0mm]
+\begin{tcolorbox}[enhanced jigsaw, left=2mm, bottomrule=.15mm, opacitybacktitle=0.6, breakable, title=\textcolor{quarto-callout-note-color}{\faInfo}\hspace{0.5em}{Driving Questions}, coltitle=black, opacityback=0, rightrule=.15mm, colbacktitle=quarto-callout-note-color!10!white, toptitle=1mm, titlerule=0mm, colback=white, bottomtitle=1mm, arc=.35mm, toprule=.15mm, leftrule=.75mm, colframe=quarto-callout-note-color-frame]
 
 \begin{itemize}
 \tightlist