From e1510b8ed585a1eb33e59aeb7532afdf8dce5233 Mon Sep 17 00:00:00 2001 From: Allen Downey Date: Fri, 15 Nov 2024 18:01:27 -0500 Subject: [PATCH] Adding zipf --- examples/zipf.ipynb | 2378 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 2378 insertions(+) create mode 100644 examples/zipf.ipynb diff --git a/examples/zipf.ipynb b/examples/zipf.ipynb new file mode 100644 index 00000000..9a3d3683 --- /dev/null +++ b/examples/zipf.ipynb @@ -0,0 +1,2378 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "You can order print and ebook versions of *Think Bayes 2e* from\n", + "[Bookshop.org](https://bookshop.org/a/98697/9781492089469) and\n", + "[Amazon](https://amzn.to/334eqGo)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# What's a chartist?\n", + "\n", + "Recently I heard the word \"chartist\" for the first time in my life (that I recall).\n", + "And then later the same day, I heard it again.\n", + "So that raises two questions:\n", + "\n", + "* What are the chances of going 57 years without hearing a word, and then hearing it twice in one day?\n", + "\n", + "* Also, what's a chartist?\n", + "\n", + "To answer the second question first, it's someone who supported chartism, which was \"a working-class movement for political reform in the United Kingdom that erupted from 1838 to 1857\", quoth [Wikipedia](https://en.wikipedia.org/wiki/Chartism). The name comes from the People's Charter of 1838, which called for voting rights for unpropertied men.\n", + "\n", + "To answer the first question, we'll do some Bayesian statistics.\n", + "My solution is based on a model that's not very realistic, so we should not take the result too seriously.\n", + "But it demonstrates some interesting methods, I think -- and as you'll see, there is a connection to Zipf's law, [which I wrote about last week](https://www.allendowney.com/blog/2024/11/10/zipfs-law/)." + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 251;\n", + " var nbb_unformatted_code = \"%load_ext nb_black\";\n", + " var nbb_formatted_code = \"%load_ext nb_black\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "%load_ext nb_black" + ] + }, + { + "cell_type": "code", + "execution_count": 252, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 252;\n", + " var nbb_unformatted_code = \"try:\\n import empiricaldist\\nexcept ImportError:\\n !pip install empiricaldist\";\n", + " var nbb_formatted_code = \"try:\\n import empiricaldist\\nexcept ImportError:\\n !pip install empiricaldist\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "try:\n", + " import empiricaldist\n", + "except ImportError:\n", + " !pip install empiricaldist" + ] + }, + { + "cell_type": "code", + "execution_count": 253, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 253;\n", + " var nbb_unformatted_code = \"# download thinkdsp.py\\n\\nfrom os.path import basename, exists\\n\\ndef download(url):\\n filename = basename(url)\\n if not exists(filename):\\n from urllib.request import urlretrieve\\n local, _ = urlretrieve(url, filename)\\n print('Downloaded ' + local)\\n \\ndownload(\\\"https://github.com/AllenDowney/ThinkBayes2/raw/master/soln/utils.py\\\")\";\n", + " var nbb_formatted_code = \"# download thinkdsp.py\\n\\nfrom os.path import basename, exists\\n\\n\\ndef download(url):\\n filename = basename(url)\\n if not exists(filename):\\n from urllib.request import urlretrieve\\n\\n local, _ = urlretrieve(url, filename)\\n print(\\\"Downloaded \\\" + local)\\n\\n\\ndownload(\\\"https://github.com/AllenDowney/ThinkBayes2/raw/master/soln/utils.py\\\")\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# download thinkdsp.py\n", + "\n", + "from os.path import basename, exists\n", + "\n", + "\n", + "def download(url):\n", + " filename = basename(url)\n", + " if not exists(filename):\n", + " from urllib.request import urlretrieve\n", + "\n", + " local, _ = urlretrieve(url, filename)\n", + " print(\"Downloaded \" + local)\n", + "\n", + "\n", + "download(\"https://github.com/AllenDowney/ThinkBayes2/raw/master/soln/utils.py\")" + ] + }, + { + "cell_type": "code", + "execution_count": 254, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 254;\n", + " var nbb_unformatted_code = \"import numpy as np\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\n\\nfrom empiricaldist import Pmf\\nfrom utils import decorate\\n\\nplt.rcParams['figure.dpi'] = 75\\nplt.rcParams['figure.figsize'] = [6, 3.5]\";\n", + " var nbb_formatted_code = \"import numpy as np\\nimport pandas as pd\\nimport matplotlib.pyplot as plt\\n\\nfrom empiricaldist import Pmf\\nfrom utils import decorate\\n\\nplt.rcParams[\\\"figure.dpi\\\"] = 75\\nplt.rcParams[\\\"figure.figsize\\\"] = [6, 3.5]\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "\n", + "from empiricaldist import Pmf\n", + "from utils import decorate\n", + "\n", + "plt.rcParams[\"figure.dpi\"] = 75\n", + "plt.rcParams[\"figure.figsize\"] = [6, 3.5]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Word Frequencies\n", + "\n", + "If you don't hear a word for more than 50 years, that suggests it is not a common word.\n", + "We can use Bayes's theorem to quantify this intuition.\n", + "First we'll compute the posterior distribution of the word's frequency, then the posterior predictive distribution of hearing it again within a day.\n", + "\n", + "Because we have only one piece of data -- the time until first appearance -- we'll need a good prior distribution.\n", + "Which means we'll need a large, good quality sample of English text.\n", + "For that, I'll use a free sample of the COCA dataset from [CorpusData.org](https://www.corpusdata.org/formats.asp). The following cells download and read the data." + ] + }, + { + "cell_type": "code", + "execution_count": 255, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 255;\n", + " var nbb_unformatted_code = \"download('https://www.corpusdata.org/coca/samples/coca-samples-text.zip')\";\n", + " var nbb_formatted_code = \"download(\\\"https://www.corpusdata.org/coca/samples/coca-samples-text.zip\\\")\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "download(\"https://www.corpusdata.org/coca/samples/coca-samples-text.zip\")" + ] + }, + { + "cell_type": "code", + "execution_count": 256, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 256;\n", + " var nbb_unformatted_code = \"import zipfile\\n\\ndef generate_lines(zip_path='coca-samples-text.zip'):\\n with zipfile.ZipFile(zip_path, 'r') as zip_file:\\n file_list = zip_file.namelist()\\n for file_name in file_list:\\n with zip_file.open(file_name) as file:\\n lines = file.readlines()\\n for line in lines:\\n yield(line.decode('utf-8'))\";\n", + " var nbb_formatted_code = \"import zipfile\\n\\n\\ndef generate_lines(zip_path=\\\"coca-samples-text.zip\\\"):\\n with zipfile.ZipFile(zip_path, \\\"r\\\") as zip_file:\\n file_list = zip_file.namelist()\\n for file_name in file_list:\\n with zip_file.open(file_name) as file:\\n lines = file.readlines()\\n for line in lines:\\n yield (line.decode(\\\"utf-8\\\"))\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import zipfile\n", + "\n", + "\n", + "def generate_lines(zip_path=\"coca-samples-text.zip\"):\n", + " with zipfile.ZipFile(zip_path, \"r\") as zip_file:\n", + " file_list = zip_file.namelist()\n", + " for file_name in file_list:\n", + " with zip_file.open(file_name) as file:\n", + " lines = file.readlines()\n", + " for line in lines:\n", + " yield (line.decode(\"utf-8\"))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We'll use a `Counter` to count the number of times each word appears." + ] + }, + { + "cell_type": "code", + "execution_count": 257, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 257;\n", + " var nbb_unformatted_code = \"import re\\nfrom collections import Counter\\n\\ncounter = Counter()\\n\\npattern = r\\\"[ /\\\\n]+|--\\\"\\n\\nfor line in generate_lines():\\n words = re.split(pattern, line)[1:]\\n counter.update(word.lower() for word in words if word)\";\n", + " var nbb_formatted_code = \"import re\\nfrom collections import Counter\\n\\ncounter = Counter()\\n\\npattern = r\\\"[ /\\\\n]+|--\\\"\\n\\nfor line in generate_lines():\\n words = re.split(pattern, line)[1:]\\n counter.update(word.lower() for word in words if word)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import re\n", + "from collections import Counter\n", + "\n", + "counter = Counter()\n", + "\n", + "pattern = r\"[ /\\n]+|--\"\n", + "\n", + "for line in generate_lines():\n", + " words = re.split(pattern, line)[1:]\n", + " counter.update(word.lower() for word in words if word)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The dataset includes more than 190,000 unique strings, but not all of them are what we would consider words." + ] + }, + { + "cell_type": "code", + "execution_count": 258, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(188086, 11503819)" + ] + }, + "execution_count": 258, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 258;\n", + " var nbb_unformatted_code = \"num_words = counter.total()\\nlen(counter), num_words\";\n", + " var nbb_formatted_code = \"num_words = counter.total()\\nlen(counter), num_words\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "num_words = counter.total()\n", + "len(counter), num_words" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To narrow it down, I'll remove anything that starts or ends with a non-alphabetical character -- so hyphens and apostrophes are allowed in the middle of a word." + ] + }, + { + "cell_type": "code", + "execution_count": 259, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 259;\n", + " var nbb_unformatted_code = \"for s in list(counter.keys()):\\n if not s[0].isalpha() or not s[-1].isalpha():\\n del counter[s]\";\n", + " var nbb_formatted_code = \"for s in list(counter.keys()):\\n if not s[0].isalpha() or not s[-1].isalpha():\\n del counter[s]\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "for s in list(counter.keys()):\n", + " if not s[0].isalpha() or not s[-1].isalpha():\n", + " del counter[s]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This filter reduces the number of unique words to about 154,000." + ] + }, + { + "cell_type": "code", + "execution_count": 260, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(151414, 8889694)" + ] + }, + "execution_count": 260, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 260;\n", + " var nbb_unformatted_code = \"num_words = counter.total()\\nlen(counter), num_words\";\n", + " var nbb_formatted_code = \"num_words = counter.total()\\nlen(counter), num_words\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "num_words = counter.total()\n", + "len(counter), num_words" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The most common words are what you would expect, with the exception of \"n't\", which is there because the COCA corpus treats it as a separate word." + ] + }, + { + "cell_type": "code", + "execution_count": 261, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('the', 461991),\n", + " ('to', 237929),\n", + " ('and', 231459),\n", + " ('of', 217363),\n", + " ('a', 203302),\n", + " ('in', 153323),\n", + " ('i', 137931),\n", + " ('that', 123818),\n", + " ('you', 109635),\n", + " ('it', 103712),\n", + " ('is', 93996),\n", + " ('for', 78755),\n", + " ('on', 64869),\n", + " ('was', 64388),\n", + " ('with', 59724),\n", + " ('he', 57684),\n", + " ('this', 51879),\n", + " ('as', 51202),\n", + " (\"n't\", 49291),\n", + " ('we', 47694)]" + ] + }, + "execution_count": 261, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 261;\n", + " var nbb_unformatted_code = \"counter.most_common(20)\";\n", + " var nbb_formatted_code = \"counter.most_common(20)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "counter.most_common(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There are about 72,000 words that only appear once in the corpus, technically known as [hapax legomena](https://en.wikipedia.org/wiki/Hapax_legomenon)." + ] + }, + { + "cell_type": "code", + "execution_count": 322, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(72159, 0.811715228893143)" + ] + }, + "execution_count": 322, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 322;\n", + " var nbb_unformatted_code = \"singletons = [word for (word, freq) in counter.items() if freq == 1]\\nlen(singletons), len(singletons) / counter.total() * 100\";\n", + " var nbb_formatted_code = \"singletons = [word for (word, freq) in counter.items() if freq == 1]\\nlen(singletons), len(singletons) / counter.total() * 100\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "singletons = [word for (word, freq) in counter.items() if freq == 1]\n", + "len(singletons), len(singletons) / counter.total() * 100" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's a random selection of them. Many are proper names, typos, or other non-words, but some are legitimate but rare words." + ] + }, + { + "cell_type": "code", + "execution_count": 263, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['perfact', 'feidhauses', 'laven', 'osmany', 'nnessee', 'baccardi',\n", + " 'eventuate', 'pagando', 'capsulate', 'r-miami', 'soyrizo',\n", + " 'narcotic-analgesic', 'phanar', 'midwater', 'chalker', 'fittv',\n", + " 'diplomatie', 'queso-broccoli', 'session-and', 'caricaturistes',\n", + " 'reverand', 'mesdames', 'flender', 'synchronistic', 'boom-era',\n", + " 'litvak', 'skowhegan', 'wailers', 'ambroeus', 'treximet', 'jonell',\n", + " 'soundwriters', 'pre-oiled', 'brimless', 'meta-billboard',\n", + " 'leather-strapped', 'ludvigsen', 'half-indian', 'bandmembers',\n", + " 'pinky-ness', 'pro-marriage-equality', 'bbed', 'sgarlatti',\n", + " 'flash-free', 'satelitte', 'goheen', 'med-school', 'lune',\n", + " 'remuddled', 'work-rule', 'seawolf', 'instuments', 'gaudiest',\n", + " \"they'lltell\", 'bahrainis', 'deacetylation', 'birth-and-being',\n", + " 'captionrodeo', 'cropper', 'dry-mounting', 'marijuana-related',\n", + " 'sidelining', 'forexpros', 'aisa', 'naxos', 'g6pd', 'extirpation',\n", + " 'colonising', 'faux-relationship', 'peditrician', 'fanboyism',\n", + " 'fartlek', 'deacetylation', 'shityou', 'undeservedly', 'budman',\n", + " 'upheaving', 'al-fatwa', 'orange-toothed', 'lehy',\n", + " 'psychodynamically', 'toys-toys', 'unreason', 'mattatuck',\n", + " 'decalogue', 'ritcheychrtien', 'silents', 'scythes', 'bothsighing',\n", + " 'dabbler', 'cyclicals', 'mussed', 'odour', 'skandalakis',\n", + " 'rayograph', 'boastfulness', 'electro-optical', 'loogierules',\n", + " 'wows', 'kornberg'], dtype='" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "np.random.choice(singletons, 100)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now let's see what the distribution of word frequencies looks like." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Zipf's Law\n", + "\n", + "One way to visualize the distribution is a Zipf plot, which shows the ranks on the x-axis and the frequencies on the y-axis." + ] + }, + { + "cell_type": "code", + "execution_count": 331, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 331;\n", + " var nbb_unformatted_code = \"freqs = sorted(counter.values(), reverse=True)\";\n", + " var nbb_formatted_code = \"freqs = sorted(counter.values(), reverse=True)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "freqs = sorted(counter.values(), reverse=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 332, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 332;\n", + " var nbb_unformatted_code = \"n = len(freqs)\\nranks = range(1, n + 1)\";\n", + " var nbb_formatted_code = \"n = len(freqs)\\nranks = range(1, n + 1)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "n = len(freqs)\n", + "ranks = range(1, n + 1)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's what it looks like on a log-log scale." + ] + }, + { + "cell_type": "code", + "execution_count": 333, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAboAAAD+CAYAAABMScBLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAAuJAAALiQE3ycutAAAwJUlEQVR4nO3deVhU9f4H8PcwM+ybbCIMKAKKg5r7Bm6puaRFJJaagYpLjqXdbnu/8ubV9rJ7Q8u9AstLApled8V9QRM3VDRlGRBBQQGRbWZ+f3idJBUHmJkzM7xfzzNPnDNnznz4PunHz/d8F5FGo9GAiIjIQlkJHQAREZEhMdEREZFFY6IjIiKLxkRHREQWjYmOiIgsGhMdERFZNCY6IoElJCSgX79+Ol9/4cIF9OzZE05OTnjttdca/H0xMTGYO3dugz9HZK6Y6IgMKCEhAY6Ojve9xGIxhgwZAgCYOHEiDhw4oPM9P/nkE3Tu3BllZWX44osvDBU6AGDevHmIiIgw6HcQGRoTHZEBTZw4EeXl5XVemzZtgo2NDd57771G3fPy5cvo1KmTniMlslxMdERGlJeXh6ioKCxcuBCDBw8GAKxevRpdunTRXtOmTRssWLAA3bp1g7OzM4YPH478/HwAQK9evZCamoo333wTjo6O2L59+33fMW/ePIwePRpTp06Fs7MzgoODkZyc/NCYjh49irCwMLi6ukIul+Onn34CAKSkpGDhwoXYsGGDthIlMkdMdERGUl1djbFjx2LYsGGPfEa2fPlyrFmzBgUFBfD29sYLL7wAADhy5Aj69++PTz75BOXl5Rg6dOgDP79582b06tULxcXF+PLLLzF+/Hj88ccf911348YNjBgxAs8//zyKioqwZMkSTJs2Dfv370dERATeeecdjB49WluNEpkjJjoiI5k9ezaqqqqwdOnSR1770ksvISQkBPb29vj000+xa9cuKJVKnb+rXbt2mDFjBiQSCcaMGYPBgwdrK7V7bdy4EZ6ennj55ZchlUoxcOBATJgwAd9//32DfjciU8ZER2QES5cuRVJSEpKSkmBnZ/fI61u3bq39uWXLlrCxsUFeXp7O33fv5+8eP+jzSqUSbdq0qXOubdu2DUqqRKaOiY7IwA4dOoRXX30Va9euvS+pPEx2drb258LCQlRVVcHX11fn77z38wCQk5PzwM/LZDJkZWXVOZeVlQWZTAYAsLLiXxFk/vh/MZEBFRQU4Nlnn8WHH36onU6gi++++w7nz5/H7du38eabb2LAgAHa5KOLzMxMLFu2DLW1tdi4cSN27tyJ55577r7rRo0ahcLCQixevBi1tbXYu3cvEhIS8OKLLwK4U01mZ2ejtrZW5+8mMjVMdEQGtHTpUuTn5+ODDz544Hy6h5kyZQrGjx+Pli1bIi8vDwkJCQ363hEjRuDQoUNwc3PDnDlzEB8fj+Dg4Puua9GiBTZt2oT4+Hi4u7tj+vTpWLJkCcLDwwEAUVFRcHZ2hqenJ1xdXRsUA5GpEHHjVSLT0qZNGyxatKjRE7XnzZuH9PR0pKSk6DUuInPFio6IiCwaEx0REVk0dl0SEZFFY0VHREQWjYmOiIgsmkToAJrK2dm5QfOLiIjI8iiVSpSWlj7wPbNNdCkpKUhJSYGLiwsyMjKEDoeIiAQkl8sf+p7ZD0aRy+VMdEREzVx9ucDsK7qysjKhQyEiIhPGio6IiMweKzoiImq2WNEREZHZY0X3qHsdz4NUbIXhoS0hEXNqIRGRJWn2FZ1Go8GT/9qHjCulaOViixf6tMbzPf3g7mijxyiJiMiQ6ssFzb58EYlESJrVD59HPQZ3R2t8tuU8+n68E39PPIHTeTeFDo+IiJrIbCu6u12XO3bsQG5url7uqdFo8HtOCVYfyMamU1dQq9age+sWiOnXBiM6ekPKbk0iIpNUX0VntonuLkMNRrlaWomEQ9lYcyQH18qr0dLZBhN7t8b4Xv7wdGK3JhGRKWGia4KqWhX+e+oKVu/PwgnlTViLrfBk51aI6dcGj/m5Gux7iYhIdxY56tJYbCRiPNNVhme6ynA8pwTfH8jChpP5SD6ehy5+rojp1wajOrWCtYTdmkREpshsKzpDPKPTVWFZJdYczkHC4RwUlVXB08kGE3r5Y2Jvf3g52xo1FiIiYtelwVTXqrHp9BV8fyALv+fcgFQswsiOrRAT1gZd/VwhEokEiYuIqLlh16WBWEus8HQXXzzdxRcnlTew+kAWNpy4gvUn8hHi7QQvZ1vYSKz+9xLD+u7P0jvHf75nBTtrCTwcreHpZAMvJ1u4O1jDyoqJkoioqVjR6dm18ir8dDgHWzIKUFGlQlWtGlW1d/+rRnWtWqf7iK1E8HC0hpeTLbycbODlbAMfFzv4u9vD3+3Oy83BmlUjERHYdWlS1GoNqlV/Jr27SbCiSoWi8koUllahsKwKhWWVKCr738+lVSgqq0K1qm6SdLSRwM/NHkFejpgzJBhBXo4C/VZERMKyyK5Lc929wMpKBFsrMWyl4ge86/LQz2k0GhSVVyHnegVyiv985RZXYFtGAbaeKcBbI0MQ3bcNuzyJiO7Bis4CZF27hb/9Jx2/59xAWJA7Phv7GHxc7YQOi4jIaLjWpYVr4+GA/8zoi9eHt8fhS8UYvmgPUo7nwcz/DUNEpBdMdBZCIraCYnAQUhRhaOVii7lr0zH63/vw9fYLOJN/k0mPiJotdl1aoMoaFZbuuYT1J/JxsbAcAODraodh8pZ4qosP5/gRkcXhqMtm7PK1W9iecRXbzl7F0axiqDVAa3d7RHTxRWQ3X7R2dxA6RCKiJjOrRJeamop3330XnTp1wpAhQxAVFVXv9Ux0uissrcT6E3fW6TyTXwqRCBjWoSVmDGyL7q3dhA6PiKjRBB2MMmfOHMhkMkgkdWcypKamIjQ0FEFBQYiNjYVKpQJwZyNUR0dHVFRUoG3btoYOr1nxcrZFbP+22PhKf2x9dQBe7NMaey4U4dklBzF2yQH8eDALZ6+UQqU2qX/7EBE1icErun379iEoKAgymQy1tbUAALVajXbt2mH9+vWQy+UYN24cnnzySURHR0OtVsPKygq3bt3CuHHjsHHjxnrvz4quaYpvVeOHg1n44WA2im9VAwCcbCUYHuqN6QPaol1LJ4EjJCJ6NEEnjIeHh993Li0tDT4+PpDL5QCAqVOnIi4uDtHR0bCyulNkOjg4PHCkYFxcHOLi4rTHJSUlBoq8eXBzsMbcoe0we3AQzhWU4WhWMQ5euo7k43n45ZgSj4d44f9GyxHgwWd5RGSeBFkZRalUws/PT3vs7++v3WonKSkJmzdvRnl5OaKjo+/7rEKhgEKh0B7fTZbUNBKxFTr6uqCjrwtiwgKgLKnAyn1ZWHMkG88uOYBVMT250SwRmSVBEl19vaWRkZGIjIx85D3MdQkwcyFrYY/3x8gR2c0XMauOYPyyQ/j2he4Y0M5T6NCIiBpEkAnjfn5+dTZLzcnJgUwmEyIUeoSOvi74ZWY/eDjaYOr3adh7oUjokIiIGkSQRNejRw8olUrtg8MVK1boVMXdKyIiAqtXr4aTEwdLGFobDwckzuwLbxdbzPjxGI7n8LkoEZkPgye6GTNmQCaTQaVSQSaTQaFQQCwWY/ny5Rg7diwCAwPh6OiISZMmNei+KSkpiImJYdelkbR0tsWPU3rD3lqCmFVpeHVtOt5LOYWtZwqg5nQEIjJhJjdhvKE4vcC4zl4pxdyf03Hl5m1UVKtQq9YgxNsJY7vL0CvADZ18Xbi8GBEZnVmtjKKru4NRduzYUed5HxlPZY0KPx3JwdI9l3DlZiUAYGA7Tyx6rgtaOFgLHB0RNScWmejuYkUnPLVagz+KyvFrej7iUi/C29kWvQPcEODhiMEhnujo48LNYInIoJjoyGj2ZBbhk83nkFtcgdLKOyvheDnZICzIA55ONmhhbw03Byl8XO0QFujBBEhEeiHoyiiGwnl0pmlAO0/tXLuc6xXYfvYqdp4rxNYzBbhVrapzbY/WLfDykGB0aOUEJxsp7KzFQoRMRBaOFR0ZTVWtCjcqalB8qxp7Movw9Y4LqLgn+bk5WKN/sAe+HNcFYlZ6RNQArOjIJNhIxGjpLEZLZ1t0aOWM8b39sfXMVVwtrURpZQ0y8kvxa3o+Hg/xwtNdfIUOl4gsBCs6MhnVtWoM/jwVGo0Gb44MwYiO3rCRsDuTiB5N0P3oiHRlLbHCP5/piGqVGnN+Tkffj3bivZRTOHK5WOjQiMiMsaIjk1Ndq8bWjAIkHMrB4cvXYSUS4b0nO6CdtxO6+beArZRVHhHVxWd0ZFasJVYY3dkHozv7oLC0EhOWH8a83+78D+xiJ0VUdxmGd/RGFz9XSMXslCCi+rGiI5NXo1LjRO4N5JZUYN2xPOy7eA0A4O1si4HtPNEn0A0jO7ZipUfUjHHCOFmUrGu3cOCP60g5nof03BuoVqlhJxVjYm9/TOzTGq3d7DkRnaiZYaIji1VZo8LWjKuIP5iNI1l3Bq042kgQ6uOM8CAPBLd0hK+rPeQ+zpybR2TBLPIZHREA2ErFeOoxH4zp3AppWSU4qbyB03k3cSynBF9sy9Re183fFf+e0A2+rnYCRktEQmCiI4sgEonQK8ANvQLcAAAajQZ5N24j53oF9l28hsWpf+CNX07ghym9WdkRNTNmm+g46pLqIxKJIGthD1kLe/QL8kBhWRV+OabEq2vT8doT7dDa3UHoEInISPiMjpqFGpUaL685js1nCtDCXopPnu2Mge09ufIKkYXgyijU7EnFVljyQjckxPYGAEz/8RiGfrkbp/NuChwZERkaEx01GyKRCGFBHtj9xmC8P1oOZcltjPlmH+ZvyMDZK6Uw884NInoIJjpqdpxtpZgSHoBtrw5EeJAHVuy7jJFf70XMqjRk5JcKHR4R6ZnZDkYhaqogL0esntwL6bklSD6eh4TDOdidWYTXh7fHSwMDOemcyEKYZEWnUqkwatQoLFq0SOhQyMKJrUTo3toN/4zohE1z+qNnmxb4bMt59PloB97/9TTUanZnEpk7gye6OXPmQCaTQSKpWzympqYiNDQUQUFBiI2NhUr1507TX331FZ566ilDh0ZUR4i3M36e3hf/jOgINwdr/HAwG6P+tRff7LwAZUmF0OERUSMZPNFFRUXh6NGjdc6p1WrExsYiMTERFy9eRGlpKeLj4wEAaWlpsLW1RUhIiKFDI7qP2EqEF/q0xq+zwzBzYCBu16jw+dZMhH+yC1NXp+FaeZXQIRJRAxltHp1EIkFtbS0A4PDhw3j99dexZ88eAMCWLVsQFxeH9evXY8GCBSgsLEReXh6KioqwZs0a+Pr6au8TFxeHuLg47XFJSQmuXLlijF+BmiGNRoPTeaVYtvcS1p/Ih7XECuN6yPDek3LulkBkQkxurUulUgk/Pz/tsb+/P3JzcwEA7777LoA7XZvp6el1khwAKBQKKBQK7cooO3bsMF7g1OyIRCJ0krngX+O7IiasDT7fch7xh3KQnnsDK2N6wsvJVugQiegRBBmMoksROWjQIMydO/eh70dERGD16tVwcnLSY2RED9fNvwXip/bGq0Pb4dyVMgz9Yjd+PpLD+XdEJk6QROfn56et4AAgJycHMpmsQfdISUlBTEwM17oko7KyEmHO0GCsmdYHHo42eCvpFIYv2oMfDmahskb16BsQkdEJkuh69OgBpVKp7U9dsWIFIiMjhQiFqFF6BbghZXYYXn48CMW3qvH+r2cQ9e1BbD1TIHRoRPQXBk90M2bMgEwmg0qlgkwmg0KhgFgsxvLlyzF27FgEBgbC0dERkyZNatB92XVJQnO2leK1J9rjyDtD8cmznXAq7yam/3gMX2+/IHRoRHQPs9294N7BKPd2gxIJ5Xp5FUZ8vRdFZVV4a2QIpoYHQCo2yTUZiCxOfaMuzTbR3cVtesiUXLl5GxOWHcbla7fQp60bXh8egu6tWwgdFpHF4zY9REbSysUO2/82EK8MCcbhy8V4dskBvLD8MLbw2R2RYMy2omPXJZm63OIKrNx/GSnH81BSUYNnuvpiQm9/dPNvATEXjCbSK3ZdEgmooroWM348hr0XrgEAOvo6Y9agIAxq7wl7a24gQqQPTe663L17t14DImpO7K0l+GFKL6T+fRBeH94eF66WY1bC7+i9cAe+P5CFq6WVQodIZNF0quiGDx+OnJwcTJ48GTExMfDy8jJGbPVi1yWZq5sVNUjNLETcrovIvFoOkQiI6OKLt0aGoKUzlxQjagy9dF1mZWVh5cqViI+PR7du3TBt2jQMHz5cr4E2BrsuyVzVqtQ4dKkYqw9kYfvZq3CwFuODMaGI7OYLCaclEDWI3p7RaTQa/Pbbb5g1axYkEglsbGzw+eefY8yYMXoLtqGY6MgSnFTewOw1x5FTXAE3B2sMbu+FSX1bo5OvCweuEOmgyYkuNzcXy5cvx5o1a9C7d29Mnz4dAwYMwKVLlzB48GBkZ2frPehHYdclWZqqWhV+Tc/HplNXsOt8EQDA0UaCIR28MKazD/q384CNhFsDET1IkxNdSEgIpk2bhujoaHh4eNR5b9GiRfXuMmBorOjIEmVeLcP+i9dw5HIxdp4rRFWtGlKxCCM6tkJUdxn6Brpz1RWie3B6AZEZK6+qxfaMq9idWYQNJ/NRo9LAxU6Klx8PwpSwAFixa5Oo6dMLBg4ciJKSEu1xcXExBg8erJ/oiKhejjYSRHT1xVfPdcHRd4fh86jH4Odmh39uPIuIxfux7pgSpZU1QodJZLJ0mq168+ZNtGjx53p9bm5udRIfERmHi70UY7vL8HQXH3x/IAuLU//Aa4knYJ1shSfkLTE1PABd/bm2JtG9dEp0IpEIhYWF2vlzBQVct49ISFKxFWL7t8WLfdtg/8Vr+O1EPn47mY8NJ69gUHtPTOzdGkM7eEEkYrcmkU6J7u2330afPn3w7LPPAgCSkpLw8ccfGzSwR7k76pI7jFNzZi2xwuAQLwwO8cKbI0Pw5dZM/HoiD6nnizCovSei+7bBwHaefI5HzZrOg1HOnj2LnTt3QqPRYOjQoQgJCTF0bDrhYBSiusqravHvnRfw48FsVFSr8JifK2YMaIuRHb1Z4ZHF4qhLomaovKoW8Yey8e3uP3CjogbtWzphclgbPBHqDTcHa6HDI9KrJo+6/PXXX9GuXTvY2trC2toaUqkU1tb8g0JkyhxtJJg5MBCH3xmC957sgBu3q/FW0in0WbgDCzZm4GJhudAhEhmFThVdUFAQ1q5di+7duxsjpgZhRUekmxqVGmmXi7F832XsPFcIAHi6iw9eGhSIEG9ngaMjapr6coFOg1E8PT1NMskRke6kYiv0C/JAvyAPZF4tw4q9l/HL70r8mp6PTr4uiOohw+jOPuzWJIujU0X35ptvoqysDFFRUbCxsdGe79evn94DunDhAr744gtUVVXBx8cHCxYsqPd6VnREjZdbXIG1ablI+l2J/JuVsBIBvQPcMb63P56Qt4StlGtrknlo8mCUB62CIhKJsHPnzkd++Zw5c7Bu3ToUFBSgtrZWez41NRUKhQJVVVUYNGgQvvvuO4jFdf9QRUVFITExsd77M9ERNZ1arcHBS9ex5UwBNpy8guJb1XCxk2J8L39E92uNVi52QodIVC9BR13u27cPQUFBkMlk2kSnVqvRrl07rF+/HnK5HOPGjcOTTz6J6OhoAMC2bduwZMkS9OnTB2+88Ua992eiI9KvqloVtmVcxY8Hs3H4cjEAoHeAG2L6tcEweUvulUcmqcmJrqysDAsXLkROTg4SEhJw/vx5ZGRk4JlnntE5CIlEok10hw8fxuuvv449e/YAALZs2YK4uDisX7++zmdGjx6N5ORkSKVS7bm4uDjExcVpj0tKSnDlyhWd4yAi3Wg0GpzJL0Xy8Twk/a5ESUUNfF3tMCU8AON6yOBkK330TYiMpMnTC6ZMmQJ3d3ecPHkSAODv749//OMfjQ5IqVTCz89Pe+zv76/dU27//v145ZVX8NJLL6Fr1651khwAKBQKZGRkYOHChejVqxckEp3G0xBRA4lEInT0dcH/jZbj4NtD8NnYzrC3FmP+hgz0WbgD89afwbmCUqHDJHoknbLEpUuXkJiYiISEBACAnZ1dk1ZYqK+IDAsLQ1hY2CPvERERgYiICMjl8kbHQUS6sZWKEdXDD2O7y7Dv4jWs2p+F1QfuvHoFuGFCL3+M7OTNjWHJJOmU6KRSKWpqarTJLS8vD1ZWje+n9/Pzq7MreE5ODmQyWYPuwbUuiYxPJBKhf7An+gd7Ire4AmuO5OCnIzmYuzYdX223xwu9W2NsdxlacIoCmRCdntH98ssvWLZsGU6fPo3nnnsOycnJ+PLLLxv9jE6lUiE4OBgbNmzQDkYZOXIkJk+e3OBfgINRiIRVXatGSnoelu65hIuF5bC3FmNcDz9MDmuD1u4OQodHzYReRl1mZmZi27ZtDV7UecaMGdi4cSPy8vLg6+uLp59+GnFxcdi5cydmz56NqqoqDBw4EEuXLm3Q87a7Fd2OHTvqVIdEJAyNRoMDf1zHsr2XkHq+CCIRMLKjNyK7yhAe7ME5eWRQXNSZiIzqYmEZVuy7jHW/56G6Vg1nWwli+7fF1PAAONhwABnpX5MTXUBAwAMHn1y6dKnp0TUSKzoi03erqhbbMq5izeEcHMkqhoO1GE918UF0vzZcX5P0qsmJLi8vT/tzZWUlEhMTcfv27SZNMdAXVnREpk+j0SA1swgJh3Kw63whVGoNOvo6Y0pYAJ56zIeT0KnJDNJ12bNnT6SlpTUpsKZgRUdknpQlFUg8qsRPR3JQWFaFdi0d8fLjwRjVqRXE3AmdGqnJiS4/P1/7s1qtxrFjx/DWW2/h7Nmz+ouykVjREZmnGpUa/zmai3/tuICrpVXo5OuCV4YEY0iIF6yY8KiBmrxNT1hYGEQiETQaDSQSCdq2bYuVK1fqNUgial6kYitM/N+8uzWHc/Dl1kxM++EoHpO54ONnO6NDKz7DI/0w21GX7LoksiwV1bWIP5SNz7dmokalxohQb7wzqgP83OyFDo3MQJO7LtesWVPv+xMmTGhcZHrArksiy5J/4zbidl1E4lElrKyAib1bQzE4iBvCUr2anOhGjx6Nffv24fHHHwcA7Nq1C+Hh4fDw8IBIJBK0G5OJjsgyZV4tw4KNZ7E7swiONhLMGhyIF/u2gSPn4dEDNPkZXW1tLU6fPq1djzIvLw+xsbFYtWqV/qIkIrpHu5ZO+H5KLxzNKsZHm87h083nsXTPJcwcGIhJfVpz4jnpTKfJK39ddNnX1xfZ2dkGC0oXKSkpiImJ4aLORBauRxs3JM7oi+8mdYevqx0+3nQO/T/dhc+3nMfV0kqhwyMzoFPX5Ysvvgi1Wo0XXngBwJ/P7H744QfDRqcDdl0SNR8ajQbbzxZicepFHM+5ATupGC8NCkRs/wDYW7PCa86a/IyuqqoKixcvxt69ewEAAwcOxMyZM2FjY6PfSBuBiY6oeTqRewMLNp7FkaxieDrZ4JUhwZjYy59z8Jopva2MUlRUBE9PT70Fpg9MdETNl0ajwZYzV/HRprPIvl6BDq2csfCZjujq30Lo0MjI6ssFOj2j27NnDwIDA9GjRw8AwPHjxxEbG6u/CImIGkEkEmFER29s/9tAvD68PbKv38Iziw9g7s/HoSypEDo8MhE6Jbq///3vSE1NhZubGwCga9euOHTokEEDexQORiGiu6RiKygGByH19UF46jEfpKTnY/DnqZi/IQO3qmqFDo8EpvOS4X5+fnWOG7JJqiFERERg9erVcHJyEjQOIjIdXk62+Nf4rtgydwD6BXpgxb7LGPR5KuIPZaNWpRY6PBKITomuZcuWOHv2rHZPulWrVsHf39+ggRERNVZ7byesntwTK2N6wMlGgvdSTmPMN/uRnntD6NBIADoNRsnJycGUKVOwb98+ODg4IDQ0FAkJCfdVeULgYBQiqk+NSo21ablYsPEsbteoMGNgW7z8eDBXWLEwTVoZRaVSITk5Gdu3b8etW7egVqvZXUhEZkMqtsILfVqjf7AH3k0+je92X8IvR5V4ZUgwnu/lBxuJWOgQycAe2XUpFosRHx8PAHBwcGCSIyKz1NrdAT9O7YUV0T3g7miND9afwZAvdmP9iXyY6SYupCOdui7ffvtt2NjYYOLEiXBwcNCe9/Hx0XtAGzduRHJyMm7fvo0nnngC0dHR9V7PrksiaiiVWoPk43n4bMs5XC2tQoi3E/7xVCh6t3UXOjRqpCZPGA8ICLj/gyIRLl269MgvnzNnDtatW4eCggLU1v45zDc1NRUKhQJVVVUYNGgQvvvuO4jFdbsQnn76afz666/13p+Jjogaq7JGhRX7LuPLbZlQqTUY1N4T85/uyD3wzFCjJ4zv3r0bAHD58uX7XrokOQCIiorC0aNH65xTq9WIjY1FYmIiLl68iNLSUm336F0fffQRpk2bptN3EBE1hq1UfGf+3d8HYUSoN1LPF2HAZ7vwxdbzUKnZnWkp6k10r776qvbnXr16NeoLwsPD4e3tXedcWloafHx8IJfLAQBTp07FunXrtO/Pnz8fAQEBGD169H33i4uLg1wu175KSkoaFRcR0V1+bvb4dlJ3rJ3eB76udvj3zosI+3gn9mQWCR0a6UG9ie7eXs2amhq9falSqawzNcHf3x+5ubkAgGXLluHnn39Gamoq5s2bd99nFQoFMjIytK8WLbimHRHpR++27tj9+mC8MiQYBaWVeHHlESgSfkdljUro0KgJ6p1ecHeC+F9/bqr6HgtOmzZNpy7LlJQUpKSkcAkwItIrsZUIfxvWDs/19MOra9Ox8dQV7LlQhK+f74LHQ1oKHR41Qr0VXXp6OqytrWFtbV3nZ6lUCmtr60Z/qZ+fn7aCA+7f2JWISGi+rnb4z4y+eO/JDiirrMWU1UcxYdkhXC+vEjo0aqAGbdPTFBKJRDvqUqVSITg4GBs2bIBcLse4ceMwcuRITJ48ucH35ahLIjK0vBu38cYvJ7D/4nUAwMJnOmFCby6DaEqavE1PU8yYMQMymQwqlQoymQwKhQJisRjLly/H2LFjERgYCEdHR0yaNKlB9+XuBURkLL6udkiI7YMvxz0GAHgn+RQ6zduC03k3BY6MdGG0is5QWNERkTGVVdZg2g9HcehSMQBgSlgA3h8jFzgqErSiMxRWdEQkBCdbKX6e3hfzIzoCAFbuv4z2721C5lX+XWSqWNERETVS/o3bGL/sELKv39nNfEpYAP5vdAe9jlIn3bCiIyIyAB9XO+x+fTDeGRUC4E5199g/tqKwrFLgyOherOiIiPSg4GYlBn62C1W1d3Yyn/90KCb1bSNsUM2IRVZ0RESmxNvFFmc/HIHxve5MO/i/X8/gia9243Y1V1URmtkmOnZdEpGpsbIS4aPITkhRhAEAMq+Wo8P7m7Et46rAkTVv7LokIjKAGpUaMauOaCeZ9w/2wIronrCWmG19YdLYdUlEZGRSsRUSYvtg2Ys9AAB7L1xDu/c24eAf1wWOrPlhoiMiMqBh8pY4N38EOstcAADjlx3C20mn6l3cnvTLbBMdn9ERkbmwlYqxfnY4FjxzZ5L5T0dyEPrBFuTduC1wZM0Dn9ERERmRsqQCAz9L1e5g/vLjQfjbsHacZN5EfEZHRGQiZC3skfnPkZg+oC0A4N87L6LXwh0oKuP2P4bCREdEZGRiKxHeGdUBW+YOAAAUlVWh54Lt+O1EvsCRWSYmOiIigbT3dsK5+SMwunMrAMDLPx3HpBWHUVFdK3BklsVsEx0HoxCRJbCVivHNhG5YPbkngDvTEOTvb0H29VsCR2Y5OBiFiMhElFbWYOSivdrRmB+MkePFvm0gtuJAlUfhYBQiIjPgbCvF3jcG4/mefgCAf/yWgahvD6DkVrXAkZk3JjoiIhNiZSXCx892xpppvQEAv+fcQI8F23FSeUPYwMwYEx0RkQnqF+iBc/NHoKu/K1RqDZ76Zj9+PJgldFhmiYmOiMhE2UrFSJ4Vhph+bQDc2fpnVsIxqNVmPbTC6Ewu0RUWFmLKlCno37+/0KEQEZmEeU+F4stxj8HfzR7/PVWACcsP4XwBR5zryuCJbs6cOZDJZJBIJHXOp6amIjQ0FEFBQYiNjYVKdWdzQi8vL6xcuRLu7u6GDo2IyGxEdpNhyQvd0MnXBYcuFePNdSfx46FsocMyCwZPdFFRUTh69Gidc2q1GrGxsUhMTMTFixdRWlqK+Ph4Q4dCRGTWQn1ckKIIQ6iPM07n3cSHv51ByvE8XCvn8mH1MXiiCw8Ph7e3d51zaWlp8PHxgVwuBwBMnToV69at0+l+cXFxkMvl2ldJSYneYyYiMlViKxE2vtIfb44IQY1Kg7lr0/H19gtCh2XSBHlGp1Qq4efnpz329/dHbm4uAKCqqgozZ87EiRMnoFAo7vusQqFARkaG9tWiRQujxU1EZCqmhAdgw8vh8HKyQfzhbMjf34xd5wqFDsskSR59if7VtxiLjY0Nvv3220feIyUlBSkpKVwCjIiaJbGVCB19XfD+GDn2ZBbhP0eV+PFQNvJu3MaYx3zgYicVOkSTIUii8/Pz01ZwAJCTkwOZTCZEKEREZm10Zx+M7NgKO84WYue5O68alRqTwwKEDs1kCNJ12aNHDyiVSu26ZCtWrEBkZGSD7hEREYHVq1fDycnJECESEZkNsZUIqa8Pwn9fuTMtK+FwDl6KP4bNpwsEjsw0GDzRzZgxAzKZDCqVCjKZDAqFAmKxGMuXL8fYsWMRGBgIR0dHTJo0qUH35e4FRER/crKVokMrJ4QHeaC8shbbMq5i9YHLQodlErh7ARGRBRq75AAuFpVjQLAn7KRivDGiPdwdbYQOy2AscvcCVnRERA8XHuwBsUiE1POFWHs0Fwf+uC50SIJhRUdEZMHOFZRixKK9mDGgLQa194JULEIXP1dIxGZb5zxQfblAkFGXRERkHG721gCA7/Zcwnd7LgEAPo7shOd7+QsZllGZbaLjPDoiokfzcrbFupf64Vp5Fcora/Fa4olmt2QYuy6JiJqJW1W1CP1gC0Z29MbTXXwAAC521ugbaP6L6Ftk1yUrOiKihrGViuFgLcam0wXYdM8cu+1/G4AgL8udk2y2iS4iIgIRERHahaGJiKh+YisRNs8dgPwbtwEABy9dx6LtF3Dzdo3AkRmW2SY6IiJqOD83e/i52QOANsFV1aqFDMngmOiIiJopa8mdKQbxh7Kx+3yR9ryznRTTB7SF1EKmIJhtouMzOiKipmnt7gA7qRj/PXX/mph9A93Rzd8ytkHjqEsiomasVqWG+p4ssC3jKhRrfsdP0/qY1WhMixx1SURETffXFVJspXeOa9WW89zOMjpgiYhIL+4mvlqVWXf21cGKjoiItKRWIgBA5tUyuNo/eJdydwcb+LvbGzOsJmGiIyIiLSfbO8nto03nHnqNSAQceWcoPJ3MY9sfs010HHVJRKR/HX2dsSqmJ0orHzyJfN+Fa0g8pkRpZQ0TnaFxZRQiIv0TiUQYHOL10PfLq2qReEwJtdp8nuFxMAoREelMLLrzDE9lRjPTmOiIiEhnVv8brKJiRUdERJbobkVnTtPsmOiIiEhnYivz67o0ucEoFRUVmDVrFuzt7dGuXTvMnTtX6JCIiOh//uy6NJ+SzuAV3Zw5cyCTySCR1M2pqampCA0NRVBQEGJjY6FSqQAASUlJGDNmDBYvXowDBw6gpsay90kiIjInd7sui8qqUHCzUi+vagNvE2Twii4qKgpvv/02ZDKZ9pxarUZsbCzWr18PuVyOcePGIT4+HtHR0cjNzcXQoUMBAJ6enrh27RpatWpl6DCJiEgHd9fCnBn/u97umaIIQxc/V73d768MnujCw8PvO5eWlgYfHx/tHLipU6ciLi4O0dHR8PPzQ25uLnr27Ilr167Bw8Ojzmfj4uIQFxenPS4pKTHsL0BERFphQR6Y/3QoKqpVerunj4ut3u71III8o1MqlfDz89Me+/v7Izc3FwAQGRkJhUKB1NRU9OvXD1Jp3bXWFAoFFAqFdmWUHTt2GDV2IqLmzFYqxqS+bYQOo0EESXT1bYFnb2+PVatWPfIeXBmFiIh0IUiiu9s9eVdOTk6dZ3i64FqXRESkC0Hm0fXo0QNKpVK7G+yKFSsQGRkpRChERGThDJ7oZsyYAZlMBpVKBZlMBoVCAbFYjOXLl2Ps2LEIDAyEo6MjJk2a1KD7RkREYPXq1XBycjJQ5EREZAlEmvoemJmwewej3NsNSkREzY9cLtf2Ev6V2Sa6u+r75YiIqHmw6ETn7OysHchSVlZ2X1fmvef++v69xyUlJWjRooXe4npQLE29/mHX6Hq+IcdsD8tuD13O1fdnx9Tbo7739d0e+m6Lh8XTlOubQ3solUqUlpY++E2NBYmOjq733F/fv/e4Q4cOBo+lqdc/7BpdzzfkmO1h2e2hy7n6/uyYenvU976+20PfbfGweJpyfXNrj7+yqN0LIiIi6j331/cfdL0hY2nq9Q+7RtfzDT3WJ7ZH/bHo4/qGtIcu5+r7s6Nv+m6P+t5nezz6PXNvj78y+65LfeGzvrrYHnWxPepie/yJbVGXKbaHRVV0TaFQKIQOwaSwPepie9TF9vgT26IuU2wPVnRERGTRWNEREZFFY6IjIiKLxkRHREQWjYmOiIgsGhPdQ1RUVCAmJgazZs3CokWLhA5HcIWFhZgyZQr69+8vdCgmYePGjYiNjcXEiRPx/fffCx2O4C5cuICZM2di8uTJePfdd4UOxySoVCqMGjWKf38ASE1NRVhYGGbOnInExESjf3+zSXRz5syBTCaDRFJ3C77U1FSEhoYiKCgIsbGxUKnubA+flJSEMWPGYPHixThw4ABqamqECNtgGtoeXl5eWLlyJdzd3YUI1+Aa2h5PPvkkli9fjoSEBCQlJQkRskE1tD2Cg4Px7bffYtWqVcjMzBQiZINqaHsAwFdffYWnnnrK2KEaRUPbQyQSwdHRERUVFWjbtq3R4202iS4qKgpHjx6tc06tViM2NhaJiYm4ePEiSktLER8fDwDIzc2Fv78/AMDT0xPXrl0zesyG1ND2sHSNbY+PPvoI06ZNM2aoRtGY9ti2bRsiIyPRs2dPY4drcA1tj7S0NNja2iIkJESIcA2uoe3Rv39/bNmyBUuWLMH7779v9HibTaILDw+Ht7d3nXNpaWnw8fGBXC4HAEydOhXr1q0DUHcX9GvXrsHDw8O4ARtYQ9vD0jWmPebPn4+AgACMHj3aqLEaQ2PaY9iwYUhKSsKePXssrgekoe2xdetWXLhwAd988w2Sk5ORl5dn9JgNqaHtYWV1J9U4ODhAiKnbkkdfYrmUSiX8/Py0x/7+/trkFhkZCYVCgdTUVPTr1w9SqVSoMI2mvvaoqqrCnDlzcOLECSgUCsTFxQkVptHU1x7Lli3Dzz//jP79++PcuXOYN2+eQFEaT33tsX//fqxduxY1NTXo2rVrs//zcvc5ZWpqKtLT0+Hr6ytIjMZUX3skJSVh8+bNKC8vR3R0tNFja9aJrr5/Wdjb22PVqlVGjEZ49bWHjY0Nvv32WyNGI7z62mPatGkW2WVZn/raIywsDGFhYUaMRni6VCaDBg3CoEGDDB+MCaivPSIjIxEZGWnEaOpqNl2XD3Jv9yQA5OTkaPe2a47YHnWxPepie9TF9qjLlNujWSe6Hj16QKlUalfaXrFihaD/6hAa26MutkddbI+62B51mXR7NGk3OzMyffp0ja+vrwaAxtfXVzNr1iyNRqPR7NixQ9OhQwdN27ZtNZMnT9bU1NQIHKlxsD3qYnvUxfaoi+1Rl7m1B3cvICIii9asuy6JiMjyMdEREZFFY6IjIiKLxkRHREQWjYmOiIgsGhMdERFZNCY6IhMkEonQpUsXdOrUCX379sXp06cbfa+srCwEBQXpMToi88JER2SCxGIx0tPTcerUKQwZMgSvvfaa0CERmS0mOiITFx4erl1D8Pbt2xg2bBi6d++O0NBQfPbZZ9rrBg0ahDfeeAN9+vRB27ZtkZycfN+9CgsL0adPH4vcLJboYZr17gVE5uC///0vnnnmGQCAtbU1EhMT4erqiurqaoSFhWHMmDHaDT5LS0tx6NAhHD16FOPHj9d+DrizyG5ERAQ+/fRTDB06VJDfhUgITHREJkilUqFLly4oLCwEAPz+++8A7myF8uGHH2LHjh3QaDRQKpU4ffq0NtFFRUUBALp3747s7Gzt/W7cuIEhQ4bghx9+QN++fY382xAJi12XRCbo7jO6nJwcDBs2DLNnzwYAJCQk4I8//sCRI0dw8uRJDB48GJWVldrP2djYALgzmEWtVmvPOzo6Qi6XY8uWLcb9RYhMABMdkQmTSCT4+uuvsWvXLpw+fRo3b96Eh4cHbGxscPnyZWzbtk3n+yQmJuLIkSNYsGCBgaMmMi1MdEQmztXVFa+99hoWLlyISZMm4cKFCwgNDcXs2bMxcOBAne9jbW2N5ORk7NmzB59++qkBIyYyLdymh4iILBorOiIismhMdEREZNGY6IiIyKIx0RERkUVjoiMiIovGREdERBaNiY6IiCwaEx0REVm0/wctsmosXdTnIwAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 333;\n", + " var nbb_unformatted_code = \"plt.plot(ranks, freqs)\\n\\ndecorate(\\n title=\\\"Zipf plot\\\", xlabel=\\\"Rank\\\", ylabel=\\\"Frequency\\\", xscale=\\\"log\\\", yscale=\\\"log\\\"\\n)\";\n", + " var nbb_formatted_code = \"plt.plot(ranks, freqs)\\n\\ndecorate(\\n title=\\\"Zipf plot\\\", xlabel=\\\"Rank\\\", ylabel=\\\"Frequency\\\", xscale=\\\"log\\\", yscale=\\\"log\\\"\\n)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.plot(ranks, freqs)\n", + "\n", + "decorate(\n", + " title=\"Zipf plot\", xlabel=\"Rank\", ylabel=\"Frequency\", xscale=\"log\", yscale=\"log\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Zipf's law suggest that the result should be a straight line with slope close to -1.\n", + "It's not exactly a straight line, but it's close, and the slope is about -1.1." + ] + }, + { + "cell_type": "code", + "execution_count": 334, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-5.664633515191604" + ] + }, + "execution_count": 334, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 334;\n", + " var nbb_unformatted_code = \"rise = np.log10(freqs[-1]) - np.log10(freqs[0])\\nrise\";\n", + " var nbb_formatted_code = \"rise = np.log10(freqs[-1]) - np.log10(freqs[0])\\nrise\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rise = np.log10(freqs[-1]) - np.log10(freqs[0])\n", + "rise" + ] + }, + { + "cell_type": "code", + "execution_count": 335, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5.180166032638616" + ] + }, + "execution_count": 335, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 335;\n", + " var nbb_unformatted_code = \"run = np.log10(ranks[-1]) - np.log10(ranks[0])\\nrun\";\n", + " var nbb_formatted_code = \"run = np.log10(ranks[-1]) - np.log10(ranks[0])\\nrun\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "run = np.log10(ranks[-1]) - np.log10(ranks[0])\n", + "run" + ] + }, + { + "cell_type": "code", + "execution_count": 336, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "-1.0935235433575892" + ] + }, + "execution_count": 336, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 336;\n", + " var nbb_unformatted_code = \"rise / run\";\n", + " var nbb_formatted_code = \"rise / run\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "rise / run" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The Zipf plot is a well-known visual representation of the distribution of frequencies, but for the current problem, we'll switch to a different representation." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Tail Distribution\n", + "\n", + "Given the number of times each word appear in the corpus, we can compute the rates, which is the number of times we expect each word to appear in a sample of a given size, and the inverse rates, which are the number of words we need to see before we expect a given word to appear.\n", + "\n", + "We will find it most convenient to work with the distribution of inverse rates on a log scale.\n", + "Here are the inverse rates:" + ] + }, + { + "cell_type": "code", + "execution_count": 267, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 267;\n", + " var nbb_unformatted_code = \"def describe(seq):\\n return pd.Series(seq).describe()\";\n", + " var nbb_formatted_code = \"def describe(seq):\\n return pd.Series(seq).describe()\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "def describe(seq):\n", + " return pd.Series(seq).describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 268, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 151414.000000\n", + "mean 6.604409\n", + "std 221.228503\n", + "min 0.112490\n", + "25% 0.112490\n", + "50% 0.224980\n", + "75% 0.674939\n", + "max 51969.280382\n", + "dtype: float64" + ] + }, + "execution_count": 268, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 268;\n", + " var nbb_unformatted_code = \"n = counter.total()\\nrates = np.array(freqs) / n\\ninverse_rate = 1 / rates\\ndescribe(inverse_rates)\";\n", + " var nbb_formatted_code = \"n = counter.total()\\nrates = np.array(freqs) / n\\ninverse_rate = 1 / rates\\ndescribe(inverse_rates)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "n = counter.total()\n", + "rates = np.array(freqs) / n\n", + "inverse_rate = 1 / rates\n", + "describe(inverse_rates)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And here are their magnitudes, expressed as logarithms base 10." + ] + }, + { + "cell_type": "code", + "execution_count": 278, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 151414.000000\n", + "mean 6.341648\n", + "std 0.673543\n", + "min 1.285082\n", + "25% 6.062383\n", + "50% 6.456414\n", + "75% 6.758455\n", + "max 9.275781\n", + "dtype: float64" + ] + }, + "execution_count": 278, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 278;\n", + " var nbb_unformatted_code = \"mags = np.log10(inverse_rates)\\ndescribe(mags)\";\n", + " var nbb_formatted_code = \"mags = np.log10(inverse_rates)\\ndescribe(mags)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "mags = np.log10(inverse_rates)\n", + "describe(mags)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From the `empiricaldist` library, we'll use the `Surv` object, which represents survival functions, but we'll use a variation of the survival function which is the probability that a randomly-chosen value is greater than or equal to a given quantity.\n", + "The following function computes this version of a survival function, which is called a tail probability." + ] + }, + { + "cell_type": "code", + "execution_count": 279, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 279;\n", + " var nbb_unformatted_code = \"from empiricaldist import Surv\\n\\n\\ndef make_surv(seq):\\n \\\"\\\"\\\"Make a non-standard survival function, P(X>=x)\\\"\\\"\\\"\\n pmf = Pmf.from_seq(seq)\\n surv = pmf.make_surv() + pmf\\n\\n # correct for numerical error\\n surv.iloc[0] = 1\\n return Surv(surv)\";\n", + " var nbb_formatted_code = \"from empiricaldist import Surv\\n\\n\\ndef make_surv(seq):\\n \\\"\\\"\\\"Make a non-standard survival function, P(X>=x)\\\"\\\"\\\"\\n pmf = Pmf.from_seq(seq)\\n surv = pmf.make_surv() + pmf\\n\\n # correct for numerical error\\n surv.iloc[0] = 1\\n return Surv(surv)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from empiricaldist import Surv\n", + "\n", + "\n", + "def make_surv(seq):\n", + " \"\"\"Make a non-standard survival function, P(X>=x)\"\"\"\n", + " pmf = Pmf.from_seq(seq)\n", + " surv = pmf.make_surv() + pmf\n", + "\n", + " # correct for numerical error\n", + " surv.iloc[0] = 1\n", + " return Surv(surv)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's how we make the survival function." + ] + }, + { + "cell_type": "code", + "execution_count": 318, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 318;\n", + " var nbb_unformatted_code = \"surv = make_surv(mags)\";\n", + " var nbb_formatted_code = \"surv = make_surv(mags)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "surv = make_surv(mags)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And here's what it looks like." + ] + }, + { + "cell_type": "code", + "execution_count": 319, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAboAAAD/CAYAAACHFRPuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAAuJAAALiQE3ycutAAAwRElEQVR4nO3deVzU1f4/8Ncw4AaIiAvIDKK4DtuIhNcrCVfN5ZKiRqa4kILZDdOW7y3LrnXLrpq2Gprl1hX154IShVlqIS5dI5VcJnEL2QQVEdxAmTm/P8hPsswMIsPAzOv5eMwjZubM5/OekebFOZ/z+RyZEEKAiIjIQtmYuwAiIiJTYtAREZFFY9AREZFFY9AREZFFY9AREZFFY9AREZFFY9AREZFFszV3AXXRunVrKBQKc5dBRESNRE5ODkpKSmp8rkkGnUKhgEajMXcZRETUSKhUKr3PceiSiIgsGoOOiIgsWpMcuiQiosqs5bLFMpnsgV9j0qCbPXs2EhISkJ+fj/Ly8hrbpKSkIDY2FmVlZQgNDcWKFSsgl8tNWRYRkcW4e/cusrOzUVZWZu5SGkTz5s2hVCphZ2dX69fITLl6wf79+9GtWzcoFIoag06n06FHjx5ISkqCSqXCuHHjEBYWhqioKIPbValUnIxCRATg/PnzcHR0hIuLS516O02JEAKFhYW4fv06unbtWuk5Q7lg0mN0wcHBcHV11ft8WloaOnXqJM2WiY6ORkJCgilLIiKyGEIIlJWVwcXFBTY2NpDJZBZ9s7GxgYuLC8rKyh5oqNasx+hycnKgVCql+x4eHsjOzm6Qfet0At9r8tHbrTVsZDLc+0Po/r+I7v1076FynQ47juVjhK8rTuVfhwyAr7sTbP5ooIPAybwSqNxa47eL1+HdqTVkMuBkbglUnRxxMrcEkAE+7k7Q5JXA190JAHAyrxg+f2xHqxP46mguPNvbQ24jg6pTa3ydnodOzi2Q/OtFvDKiBxbtyMATfZXo27kthBDYejgLedfKMCOkC1aknIOrY3Ok5xajj9IZWp0W24/kwEYux9gABZ7sq0RCeg7yrt5Gfy8XJB7JhdqjDXRChx80BXC2b472js1x5UYZnFvIsTvjMtQKZ8htABsbG/grnLDnVAF0OgEhBIpv30WpVsDHrQ36eDhhl+YiLpaUoV0LOX67VAqn5kDhbeDufZ+9o23FLa/U8L+RDYCU/wuGRzunh/73JrJklt6Tu1+jO0ZnTG0TOS4uDnFxcdL9oqKih953es41vJv8G94J94GPwumPeu6rDeLeD5Kk4/lY/F0Gcotu49uT+ZABeHeMD3zcnSAEcCK3GG9/o8HEfh5YfygLb4T1BgDMT/4NkUEeWHMwEwAw9a+e0vMCAu8mn8LcsF5QuTnhm2N5+GDXabRqJkdLOzmG9O6ITb9kQy4D7uqAtMwi5JeUYc+pK1j0hC9+v3ITH+4+AwA4eO4Kfsu/IdWbfOJSpfes2ZGBg+ev4odTlwEAaw5egFYAX58oMPhZ7Tx1Rfr56yrbvOdC0SUka/58LuuPaLt9u3rb6+UVN2N0AAYu2W+84QNYNqUH/q7qXq/bJKLqUlJSMH/+fOzevVtvm7fffhvz5s0zeS1mDTqlUlmpB5eVlVXjFU9iY2MRGxsr3Td0YmBtqRVt8PGEPlAr2sDGpnZ/IUT/tQtat7DDE2p3jOrTCTIB9PFwll7v2roF2jk2h18nJ/Tr6gK1og0AoEPrFvDr5ISgrm0hE4C/og3+4vXn865OLaU6nh3oBWf7ZujW3h62Njbw7eQEH4UTPNq2wLYjeXjz773xVvJvGN9XiSAvF+h0Am3s7ZBXdBvPh3TD0r1n0cGhGX7NuVbRoxNabPslGzZyW0QEKDH+EQ9sOpqN3Ku3ENytHRKO5ECtaAMddNijKYBzq2bo6Ngcl6/fgXNLG3x/ynw9OlN47r+nAZw22GZyH+Cdp8IapiAiK9ZQQWfSySj32Nra1jgZRavVonv37vjmm2+kySgjRozA1KlTDW6Pk1Es35XiWwhc8KO5y6jRMHdgxfMMQjI/IQROnTqFXr16NYrhy/Xr1+Pf//43HB0dMXDgQBw/fhyLFi3CrFmzcOvWLQgh8N5772Ho0KF48cUX8dFHH8Hf3x/t2rXD7t278fzzz+N///sfSktL4e/vj1WrVqF58+aV9qHvPRvKBZMG3YwZM5CcnIzc3Fy4u7sjPDwcU6dOxbx587Bjxw4AwA8//ICZM2eirKwMISEh+Pzzz2Fra7ijyaCjB/XD6d8xbbVpf2c+iOyKsX69TboPovtV/dLX6gQuXzd+moFOCGjySqDq1FqaY2BIe8fmkBsZ+crPz0dAQAAOHz4MV1dXREREoLi4GNu2bUPLli1hZ2eH3NxcDBw4EOfOnQNQvRNUWFgIFxcXAMCsWbPg7e2NGTNmGHzP9xjKBZMOXa5YsaLGx++FHAAMGjSIoUUmN6hHF2Qu7GKwTXm5DmFvfIuMOu7jpQ3n8dKG85UeS4ztC7VS/8xjInPQ5JXgra9P4q2R3vBxr5/JXocOHUJwcDDc3NwAAFOmTMHSpUtx48YNxMTEQKPRwNbWFtnZ2bhy5QratWtXbRtfffUVli9fjtLSUhQXF0On09VLbbwyCtEfbG1t8N1C/UOSOzRn/jjGV3uj4w5Xus/JMGRKchsZXJ1aGG3XwbE52jo0e6A5CnU1d+5cBAQEYNOmTZDJZHBxcUFpafUD9JmZmXjrrbdw+PBhtG/fHkuXLsWvv/5aLzUw6Ihq6e+q7shcWD2kpn+cjF0Xa7eNqpNhMg0EK5Gp2NjIEODhXK/b7NevH5577jnk5+ejY8eOiI+PBwAUFxfD3d0dMpkMW7duxdWrV6XXtGrVCjdv3oS9vT1KSkrQsmVLODs749atW1i3bh38/PzqpTYGHdFD+mJ29bDqOScZtbkgk+ecZOlnhh41Za6urnjvvfcwcOBAODo6IiQkBEVFRXjttdcwZcoUvP/++3j00Ufh4eEhvWbmzJno27cvFAoFdu/ejdDQUPTq1Qvt27dHUFBQjT2/umiQWZf1jZNRqKm5P9Bqg6FHtdHYZl02hEY3GYWIKtwfXCv/dxjzE/MNtr8XjDYAzjP0iB4Kg46ogcX8pS9i/lLx8+6M84hZ85vetjr8GXrx030R7OWhty0R1YxBR2RGQ3p2RebCiquwf7b/EBZ+c0Vv20lfHAdwHAOcgPWvsZdHVFtcYZyokXg2uB8yF4Yhc2EYAg3MED9QXNHLe9DjfmS5muBUizqry3tlj46oEdr61p89NkOBdu85Tl6xTjKZDM2bN5euKGLpE1LurUfXvHnzB3qvnHVJ1ETUpgfHwLM+XGG8gtmudWkqDDqyZqo5ybhlpA0Dz/o0wa/yOtHXkzPbCuNEVP80fxzHszPQxnNOMhZ/q38dMLI85l79u6FudcGgI2qizvwReB30PB+3t4wTVojAoCNq8n7+I/D08ZyTjJiPGHhkvRh0RBYi00Dg7c5/8MuQEVkKBh2RhclcGIbOep7znJOM85eKGrQeInNj0BFZoL0GeneDPjjI3h1ZFQYdkQXLXBiG1nqeY9iRtWDQEVm4YwZ6dww7sgYMOiIrYSjszhZcrfE5IkvAoCOyIvrCbsiHPyGEvTuyUAw6IiuTuTAMPjU8fgEcyiTLxKAjskLf8LgdWREGHZEVY9iRNWDQEVk5hh1ZOgYdETHsyKIx6IgIAMOOLBeDjogkDDuyRAw6IqpEX9i9uIphR02TSYMuJSUF3t7e6NatG2JiYqDVaqu1WbJkCby9veHn54fhw4ejoKDAlCURUS3UFHbbzwCZl681fDFED8lkQafT6RATE4MtW7bg7NmzKCkpQXx8fKU2Z86cwfLly/HLL7/g2LFjUKvVWLJkialKIqIHUFPYhb5/wAyVED0ckwVdWloaOnXqBJVKBQCIjo5GQkJCpTZCCNy9exe3b9+GEAIlJSVwc3MzVUlE9IBqCjser6OmxmRBl5OTA6VSKd338PBAdnZ2pTY9evTA888/D6VSCTc3N5w8eRKzZ882VUlEVAcMO2rqTBZ0QgijbQoLC7Ft2zacPXsWubm56Nq1KxYvXlytXVxcHFQqlXQrKuIKyUQNSVbDYww7aipMFnRKpbJSDy4rKwsKhaJSmx9++AFdu3aFm5sb5HI5nnzySRw8eLDatmJjY6HRaKSbs7Ozqcomohr8rmcmJlFTYLKgCwwMRE5ODjQaDQBg1apVGDt2bKU2nTt3xqFDh1BSUgIA2LVrl3RMj4gaFw5hUlNlsqCTy+VYuXIlIiIi4OXlBQcHB0yePBlJSUmIiYkBAAQFBSEqKgpBQUHw9fXFhQsXMGfOHFOVREQPiWFHTZFM1OZgWiOjUqmkniIRNax/bUrGuqOVH/vfqyFwdXYwT0FEMJwLvDIKET2Qd56q3qv7y6K9ZqiEqHYYdET0wDiESU0Jg46I6kTfNTGJGhsGHRHVG/bqqDFi0BFRndXUq3vqHYYdNS4MOiJ6KPIq9w/dNEsZRHox6IjooZzjxBRq5Bh0RPTQODGFGjMGHRGZBHt11Fgw6IioXrBXR40Vg46ITIa9OmoMGHREVG/Yq6PGiEFHRCbFXh2ZG4OOiOoVe3XU2DDoiMjkvj91ztwlkBUzGnRTp07FTz/91BC1EJGFqNqre2btKTNVQlSLoBs6dCjmzp0LHx8ffPjhhygsLGyIuoiIiOqF0aCbMGECfvjhB3z11Ve4fPky+vTpg/HjxyMlJaUByiOipqpqr46TUshcan2M7vz58zhz5gzs7e3Rq1cvvPPOO4iKijJlbURERA/N1liDd955B+vWrYOfnx9mzJiBxx57THque/fuJi2OiCzLmH8lY/s7nJVJDcto0NnY2CA1NRWurq7Vnvvuu+9MUhQRWYbMhWGVhiyP3jVjMWS1jA5dnjlzplrIPf300wCArl27mqQoIiKi+mI06I4dO1btscOHD5ukGCKyPJyUQuamd+hy6dKl+OSTT5CTk4MePXpIj1+/fh3Dhg1rkOKIiIgelt6gmzJlCkaOHImXXnoJH374ofS4o6Mj2rZt2yDFERERPSyZEEKYu4gHpVKpoNFozF0GET2AqkOWvCYm1SdDuaC3Rzd69GgkJiaie/fukMlk0uNCCMhkMpw+fbr+KyUiIqpneoNu+fLlAIDdu3c3WDFERET1jUOXRNRgOHxJplKnocuqQ5b3PMjQZUpKCmJjY1FWVobQ0FCsWLECcrm8UpuCggI888wzyMjIgBAC77//Ph5//HGj2yYiIqoNvUH3sEOWOp0OMTExSEpKgkqlwrhx4xAfH1/t+phRUVGYNm0axo0bh/LychQXFz/UfomIiO6n94Txzp07G7wZk5aWhk6dOkGlUgEAoqOjkZCQUKlNRkYGCgoKMG7cOACAra0tXFxcHub9EFEjxpPHyRxMNusyJycHSqVSuu/h4YHs7OxKbU6dOoUOHTpgwoQJOHXqFHx9ffHRRx/xPD0iIqo3Jpt1WZs5LuXl5UhNTUVaWhp8fHwwb948/POf/8SqVasqtYuLi0NcXJx0v6ioqE41ERGR9dE7dOnm5gagYgjT3t4eGo0GGo0GDg4OtRq6VCqVlXpwWVlZUCgU1dqoVCr4+PgAAMaPH1/jdTRjY2Ol/Ws0Gjg7O9fu3RFRo8PhS2poRi/qvGnTJvj6+mLlypX44osv4Ofnh82bNxvdcGBgIHJycqTpnqtWrcLYsWOrtblz544UiLt27YK3t3dd3gcREVGNjK5HN2/ePKSlpUm9sdzcXAwePFiaQKKPXC7HypUrERERgbKyMoSEhGDy5MlISkpCUlISVq5cCRsbGyxbtgzh4eEoLy+Hu7s7Vq9eXT/vjIiICLU4Ybxv377VhhNreqwh8YRxoqaPJ49TfarTCeMHDx4EAISEhGDChAmYMmUKZDIZ1q1bh9DQUJMUSkREVN/0Bt3cuXMr3X/vvfekny9evGi6ioiIiOqR3qD78ccfG7IOIrIymQvDOOOSGoTRySgAcPr0aZw4cQKlpaXSY5GRkSYrioisj+ecZB6nI5MwGnTvv/8+Nm3ahPPnzyM0NBS7du1CaGgog46IiJoEo+fRrV27FgcOHIC7uzu2bt2K9PR06HS6hqiNiIjooRkNuhYtWsDOzg4ymQzl5eXo0qULMjMzG6A0IrJ0vEoKNQSjQ5fOzs4oKSnBoEGD8OSTT6Jdu3a86DIRETUZRnt0SUlJcHBwwOLFizFmzBj4+fnhq6++aojaiIiIHlqthi6vXr2K77//Hu3bt0dkZCTatGnTAKURkTXgTEsyNZNd1JmIqC54nI7qm8ku6kxERNQYGO3ROTg4VFpHzt3dHfb29iYtioisC1eYJFPSG3QHDx7EwYMHpYs6f/vtt9i5cycmTpzIizoTUb06WuU4XVcOX1I94kWdiajR4SUpqD7xos5ERGTRjB6j0+l0WLFiBcaPH4/x48fjiy++4CXAiKje8TQDMhWjQffCCy9g586dmDBhAiIjI7Fz50688MILDVAaEVkznmZA9cXo6QV79+7Fr7/+Kt1//PHHoVarTVkTERFRvanV0GVJSYl0/8aNGxBCmLQoIiKi+mK0R/fcc88hMDAQY8aMAQAkJibipZdeMnlhRGR9uOo4mYLBHp0QAqNGjcKWLVvg7u4OhUKBzZs3Y8aMGQ1VHxFZMYYe1QeDPTqZTIbhw4fj+PHj8Pf3b6iaiIiI6o3RY3S9evXC6dOnG6IWIiKiemf0GF1ubi78/f0REBBQ6RqX33//vUkLIyLrxON0VN+MBt2CBQsaog4iohp5zknmyeT0UIwGXUhICMrLy3HmzBkAQPfu3WFra/RlREREjYLRY3QpKSno2rUrJk2ahIkTJ6Jbt27Yu3dvQ9RGRFZq9TSVuUsgC2I06GbOnImkpCQcPnwYR44cwVdffYXY2NiGqI2IrNSgHl0q3Z/wLo/ZUd0ZDTo7O7tKl/zy9/eHnZ1drTaekpICb29vdOvWDTExMdBqtXrbhoWFoVu3brXaLhFZl5+um7sCasqMBt2wYcMwZ84c/Pbbbzh16hRef/11jBgxAnl5ecjLy9P7Op1Oh5iYGGzZsgVnz55FSUkJ4uPja2y7fv16tG3btu7vgoiISA+js0o2bdpU6b/3bNy4ETKZDOfPn6/xdWlpaejUqRNUqoqx9ujoaMTFxSEqKqpSuytXriAuLg6rV6/G448/Xqc3QUSWh6cZUH0xGnS///57nTack5MDpVIp3ffw8EB2dna1di+88ALmz5+PFi1a1Gk/RGQdeJoB1ZXRocu6qs0KB99++y3kcjkGDRpksF1cXBxUKpV0Kyoqqq8yiYjIwpnshDilUlmpB5eVlQWFQlGpTWpqKvbs2QNPT0+Ul5ejoKAAfn5+OHbsWKV2sbGxlWZ63hsOJSIiMsZkPbrAwEDk5ORAo9EAAFatWoWxY8dWarNgwQLk5OQgMzMT+/fvR+fOnauFHBFZLw5VUn0wWdDJ5XKsXLkSERER8PLygoODAyZPnoykpCTExMSYardEZME4OYXqQib0HEzr3r07ZDJZtceFEJDJZGZd0UClUkk9RSKybFXDjb08qomhXNB7jG737t0mK4iIiKih6A26zp07N2QdREQ1qno+3ctrkvH+VPbqqPb0Bt3o0aORmJhYbQizMQxdEpH1SsgA3jd3EdSk6A265cuXA+AQJhERNW16g87NzQ0AhzCJyPx4OTB6GEZPL0hPT8df//pXtG7dGs2aNZNuRETmwtCjB2H0yij/+Mc/sGzZMkybNg379u3DZ599hrt37zZEbURERA9Nb4/uwIEDAIA7d+6gT58+KC8vh4ODA/7v//4PCQkJDVYgERHRw9AbdM8//zwASMOUnTt3xqZNm3DgwAEUFxc3THVERH+oeqL4/zt6wkyVUFNjdOjyzTffRHFxMZYsWYLnnnsOJSUlWLp0aUPURkSk15xNFzC+j4+5y6AmQG/QZWVl4ZlnngEAbNu2DQDg5eUl3R8+fHgDlEdERPRw9Aadvb09BgwY0JC1EBEZxNMMqC70Bp2LiwuioqIashYiogfCVcepNvRORqnNCuFERESNnd6g27dvX0PWQUREZBJ6g87BwaEh6yAiqpWqQ5U8ZkfGmGyFcSIiosaAQUdERBaNQUdETU7V4cudv501UyXUFDDoiKjJe/bLDHOXQI0Yg46IiCwag46ImqSqw5eavMtmqoQaOwYdEVmEv3/ys7lLoEaKQUdERBaNQUdETVbV4csNR46bqRJqzBh0RGQxXt+cZe4SqBFi0BERkUVj0BFRk1Z1+FLNa19SFQw6IrIo18xdADU6Jg26lJQUeHt7o1u3boiJiYFWq630fHp6OgYMGABvb2/4+Pjgk08+MWU5RERkhUwWdDqdDjExMdiyZQvOnj2LkpISxMfHV2rTqlUrrF69GidPnsTBgwexdOlSpKenm6okIrJQXLqHDDFZ0KWlpaFTp05QqVQAgOjoaCQkJFRq06NHD/Ts2RMA0Lp1a/Tu3RvZ2dmmKomIiKyQyYIuJycHSqVSuu/h4WEwxM6dO4dffvkFAwYMMFVJRERkhUwWdEKIWre9du0aRo8ejY8//hht27at9nxcXBxUKpV0Kyoqqs9SicgCcPiS9DFZ0CmVyko9uKysLCgUimrtbt26hbCwMEyfPh1PPvlkjduKjY2FRqORbs7OzqYqm4iILIzJgi4wMBA5OTnQaDQAgFWrVmHs2LGV2ty9exdjx47FY489hlmzZpmqFCKyUpMWsldHJgw6uVyOlStXIiIiAl5eXnBwcMDkyZORlJSEmJgYAMDmzZuxa9cuJCYmQq1WQ61WY+vWraYqiYgsXNXhy/3XzFMHNS4y8SAH0xoJlUol9RSJiO5X9dhc1fAjy2QoF3hlFCKyKJyUQlUx6IiIyKIx6IjI4rFXZ90YdERkcXhcju7HoCMiq8BenfVi0BGRRWKvju5h0BGR1WCvzjox6IjIYrFXRwCDjoisDHt11odBR0QWjb06YtARkdVhr866MOiIyOLV1Kt7ZzvDzlow6IjIKrSpcn/VIXNUQebAoCMiq5BeQ6+OQ5jWgUFHRFajpiHMbcd+M0Ml1JAYdERk1V7acN7cJZCJMeiIyKrU1KvjEKZlY9ARkdVh2FkXBh0R0R8YdpaJQUdEVknfFVMWJX/fwJWQqTHoiMhq1RR2y/fdxen8QjNUQ6bCoCMiq1ZT2A396H94cwuHMS0Fg46IrF5NYfflYR6zsxQMOiIi6D9mx7Br+hh0RER/MBR2SScyGrgaqi8MOiKi++gLu1nxZ9m7a6IYdEREVRharNVzTjIDr4lh0BER1SBzYVitAu+bk6cbsCqqCwYdEZEBmQvD4Gng+Znrzkihd+16aUOVRQ/ApEGXkpICb29vdOvWDTExMdBqtdXabN68GT169ICXlxfmzp1rynKIiOok5Y/e3eCOhtup390jhZ7nnGRkXr7WIPWRYTIhhDDFhnU6HXr06IGkpCSoVCqMGzcOYWFhiIqKktoUFxfD19cXhw4dQvv27TFw4EAsWLAAISEhBretUqmg0WhMUTYRUa08zHG62JDm+OeIIfVYDRnKBVtT7TQtLQ2dOnWCSqUCAERHRyMuLq5S0O3cuROhoaFwc3MDAERFRSEhIcFo0BERmdv9x+8eNPTi9pYhbq9pJrS0tAF6u7ZC4e1y+Lm3xeIIf7RoYbKv+ibBZO8+JycHSqVSuu/h4YHs7GyjbXbu3GmqkoiITKLqpJWBc5KRZaZabuuAI3m3AAAXivKRcjofj3Zvh+t3dBjUuwPyi0qRW3QTWUW38YhnW9jJ5VC2bQWtEDiSWYQOTs3RxcUBXTvYY8PBTBSVlSP2b93QTC7HmYIb6N7RATYyGcp1OuzRFEDZthV6ubWGjUwGANAJUamdITohcO3WXYT0aA8bG8NtH4bJgq42I6K1HTWNi4tDXFycdL+oqKjOdRERmVqqntmapy5ewfCPD5l03/d6dJnXynD1lhbX7wA7Tl4BABw6X4Q72j+/d4/l3oAMgGMLW5TrBG7d0UIGoK19Mwzs0R5fHS8AALjYt8Dk/p2xLOUsPnhKDX9lG2z8OQsrUs+jTatm+CIqEP7KNgCAI1lFldoZciSrCP/++iQ+eEqNAA9nE3waFUwWdEqlslIPLisrCwqFolqbY8eOGWwDALGxsYiNjZXu3xsOJSJqSnq5tTN4ykJ9Ki/X4b+Hfseek/no0d4BV2+V48lAJc4V3sCFy9dxOv8mBvXuCDs7G/To6AidEEg5dQnubVqhd6fW8HVzQjt7W1wuuYMF4T5o1kyOD55SQ61oAwB4MkABnRDo0cFBegwA1Io2ldoZ8iBtH4bJJqNotVp0794d33zzjTQZZcSIEZg6darUpri4GD4+Pvj555+lySjvvvsu/va3vxncNiejEBHR/QzlgslOL5DL5Vi5ciUiIiLg5eUFBwcHTJ48GUlJSYiJiQEAODk5YfHixRg4cCB69uyJkJAQoyFHRET0IEzWozMl9uiIiOh+ZunRERERNQYMOiIismgMOiIismgMOiIismhNcjJK69atazzfrqioCM7OpjvpsL6xXtNqavUCTa9m1mt6Ta1mc9Wbk5ODkpKSGp9rkkGnT1Objcl6Taup1Qs0vZpZr+k1tZobY70cuiQiIovGoCMiIotmUUF3//UwmwLWa1pNrV6g6dXMek2vqdXcGOu1qGN0REREVVlUj46IiKiqJh90s2fPhkKhgK1t01hBNzs7G4MHD0bv3r3h7e2N1157zdwl1crQoUOhVqvh6+uLiIgIvdN4G5vY2Ngm8bvh6ekJb29vqNVqqNVqHD9+3NwlGXXz5k1ERUWhZ8+e6NWrF1asWGHukvS6dOmS9Nmq1Wq4urpizJgx5i7LoPj4ePj5+UGtVuPRRx9FRkaGuUsy6LPPPoOPjw+8vb3x3HPPQavVmrukP4kmbt++feLixYtCLpebu5RaycvLE2lpaUIIIcrKykRwcLBITEw0c1XGXbt2Tfp59uzZ4s033zRfMbWUmpoqpkyZ0iR+Nzp37iyys7PNXcYDmTFjhli0aJEQQgidTicKCgrMXFHtDR48WKxfv97cZeh18+ZN0bZtW3H58mUhhBDLly8XERERZq5KvxMnTggvLy/pe2LWrFniyy+/NHNVf2ryPbrg4GC4urqau4xac3NzQ2BgIACgWbNm6NOnD7KyssxclXFOTk4AAJ1Oh9LSUshkplv2vj6UlZVhzpw5WLJkiblLsUjXr19HUlISXnrpJQCATCZDhw4dzFxV7eTl5eGXX37B6NGjzV2KXjqdDkII3LhxA0DF2p1ubm5mrko/jUaDfv36Sd8Tw4YNw6ZNm8xc1Z+afNA1ZVevXkViYiIee+wxc5dSK2PGjEGHDh2QkZGBl19+2dzlGPT2228jOjoa7du3N3cptTZy5Eio1WrMnTsXd+/eNXc5Bp0/fx4dO3bEzJkzERAQgDFjxuDChQvmLqtWNm7ciNGjR6NVq1bmLkUvBwcHfPrpp/Dx8YG7uzu+/PJLvPPOO+YuSy8/Pz/s378feXl50Gq12LJlC7Kzs81dloRBZyZ37txBREQEZs+ejV69epm7nFrZvn078vLyoFAosHXrVnOXo9exY8dw6NChSqvZN3b79u3D0aNHceDAAWRkZDT6nmh5eTnS09MRERGBI0eOYOTIkZg2bZq5y6qV+Ph4TJo0ydxlGHT37l0sW7YMaWlpyM3NRUREBF599VVzl6VXz5498e6772LUqFEIDg6GUqmEXC43d1kSBp0ZaLVaREZGQq1WN/qeUVXNmjXD+PHjsX37dnOXoteBAweg0WjQpUsXeHp6QqvVwtPTs1FPoFEqlQAAe3t7xMTE4ODBg2auyDCFQgEXFxcMGTIEADB+/HgcPnzYzFUZp9FocPnyZQwaNMjcpRiUnp4OIQR69+4NoOLzbey/E5MmTcIvv/yCn376CX369GlUf8Az6MzgmWeegaOjI95//31zl1Ir169fx8WLFwFUHDtISkqCt7e3mavS7x//+Afy8vKQmZmJzMxMyOVyZGZmonXr1uYurUY3b96UQlir1SIhIQF+fn5mrsqwjh07wtvbG0eOHAEA7Nq1q1H/Ttyzbt06REZGwsamcX/1KRQKZGRkIDc3F0DF56tSqcxclWEFBQUAgJKSEixatAjPP/+8mSv6U5M/YXzGjBlITk5Gbm4u3N3dER4ejri4OHOXpdeBAwcQHBwMHx8fqWs/bdo0zJo1y8yV6ZeXl4fw8HCUlZVBp9OhX79+WLp0aaM+xnE/W1tblJeXm7sMvc6fP4+xY8dCp9NBq9Wif//++OSTTxr956vRaBAdHY2bN2+iTZs2WLFihdQDaYyEEPD09MTXX3/d6P+QAICVK1figw8+gJ2dHdq3b4/Vq1fDw8PD3GXp9be//Q0FBQUQQuCf//xnoxrKbvJBR0REZEjj7r8TERE9JAYdERFZNAYdERFZNAYdERFZNAYdERFZNAYdERFZNAYdNYllbB5EYmIijh07VqfXDh06VDpJ19PTEzk5OXWuY+HChfDy8oJMJqu2nY8//hjdu3dHt27dzHLe59NPP434+PgG36+1W7hwIf773/+auwyrw6CjBlGfJ2wb21Zdg27Pnj1wd3eHu7t7XUurZPDgwfjhhx/QuXPnSo+fOXMGy5Ytw9GjR3H06FF89NFH+P333+tlnzVpDCfLN4YaDGmo+mJjY7F48WLw9OWGxaCjSmxtbfH2229DrVbDz88Pp0+fhhACXbp0kXo6ABAWFobvv/8eALB06VIEBQXB398fMTEx0pX3PT09MWfOHAQGBmLp0qXYvn27tJCkn5+fdLX7lJQUBAcHo2/fvhg6dGiNVz1fu3YtHn/8cQwbNgw+Pj4AgIiICAQGBsLHxwcvvvgiAODHH39EUlIS5s6dC7VajYMHD+L27dt49tlnERQUBF9fX3z66ac1vvcvv/wSERERNT63f/9+BAYGws/PD2FhYcjPzwdQsQLF3//+d3h7e+OJJ55Av379sH//fgDAI488Ui3kAGDbtm146qmn4ODgAEdHR0RERFS7dqhOp0OnTp2kq9G0bdtW6vm99dZb0nvYvn07/P394evri8jISFy/fh1ARY9txowZ6N+/P6KiolBaWorJkyejV69eGDZsGC5fvizta968efD29oafn5/elTRsbW3x+uuvw9fXFwEBAdLCsDqdDnPnzkVQUBD8/Pzw+uuvV3rNG2+8gT59+mDbtm2Vtnf48GEMGDAAffr0gVqtln6XDO1r7dq1GDlyJIYMGYKePXsiKioKd+7cAQBcuHABjz/+OAIDAxEYGIi9e/fWaj/317dmzRoEBQWhT58+CA0Nlf74SElJwYABAxAZGQmVSoURI0agtLQUAHDlyhU89dRT8PPzg5+fHzZu3Aig4sLigwYNQt++fREcHCy9B0dHR3h5eWHfvn01fs5kIuZaCI8aj/sXJgUgNm7cKIQQYvHixSImJkYIIcSrr74qlixZIoQQ4sqVK0KhUIjy8nKxZ88eMWnSJKHVaoUQQsTGxoply5YJISoWE/3Xv/4lbdvX11fk5eUJIYS4deuWuH37tigsLBT9+/eXFmzcvHmzGDduXLUa16xZI9q1ayfy8/Olx65cuSKEEEKr1Yrw8HCxc+dOIYQQUVFRYt26dVK7f/3rX2LFihVCCCFKS0tF3759xcmTJ6vtw9PTs9L27y2GWlpaKhQKhTh8+LAQQoglS5ZINc6aNUu8/vrrQggh0tPThVwuF/v27au03aqLqs6cOVN8/vnn0v1ly5aJF154oVo9o0aNEnv37hXp6ekiKChIPPnkk0IIIUJCQsTx48fFxYsXhaurq7hw4YK03VdeeUX6DAYNGiTu3LkjhBDigw8+EBMmTBA6nU5kZWWJ1q1bi3Xr1onCwkLRu3dv6d/v6tWr1eoQouL3Ii4uTgghRGJioujbt68QQohVq1ZJ71+r1YqRI0eKHTt2SK9ZtWpVjdsrLi6WasvJyRFdu3Y1uq81a9aINm3aiJycHKHT6cTYsWPF0qVLhRBCDBo0SJw4cUIIIcSFCxdEly5dhE6nM7qf++u79/skhBAJCQli/PjxQgghfvzxR9GqVStx7tw5IYQQYWFhIj4+XgghRGRkpHj77bel1xUWFoo7d+6Ifv36iZycHCGEED///LMICgqS2syfP1/Mnz+/xs+FTMOyDs7QQ5PJZHjiiScAVPRIdu7cCQCYOHEioqKi8PLLL2Pz5s0YO3Ys5HI5duzYgdTUVAQEBAAASktL0bJlS2l7EydOlH4ODQ3FpEmTMHr0aISHh8PDwwO7d+9GRkYGQkJCAFT0EBwdHWusbciQIejYsaN0//PPP8fmzZuh1Wpx6dIlBAcHY9iwYdVet2PHDty+fRvLli0DUHHR2YyMjGoXyb148WKN69edOnUKrq6u0nuMjo7GokWLAACpqalYv349AEg9K2NELYetQkJCsHfvXjg5OWH69OmIi4tDaWkpzp49C29vbyQlJSE4OFi6/mF0dDSeeeYZ6fXjxo2DnZ2dVOf06dMhk8mgVCqlq/c7OTnB3t4eTz/9NIYNG4aRI0fqrScqKgoAEB4ejqlTp+LmzZvYsWMHfv31VyQnJwOouED1mTNnMGLECABAZGRkjdu6ceMGYmJioNFoYGtri+zsbFy5cgXt2rXTuy+g4nfg3tDylClTEB8fj6effhr79++v9Lt2584dXLp0CVqt1uB+7q8vIyMDc+fOxZUrV6DVaitd+DkgIABdu3YFUPH/xb3e3nfffYfly5dL7dq2bYsTJ07g5MmTCAsLkx6/evWq9HOHDh2Qnp6u93Om+sego0psbGykL0e5XC4du/D19UV5eTlOnTqFDRs2SCsvCCHw4osv4oUXXqhxe/b29tLPn3zyCY4ePYpdu3YhJCQE8fHxEELg0UcfRWJiotHa7t/W3r17kZCQgNTUVDg6OuLll1+WhpOqEkJg/fr1UKvVBrffokULlJWVVQpqANVWUze0unptVl5XKpWVhmezsrKgUCiqtQsNDcUrr7wCJycnLFq0CHv27MHatWsRFBQEmUxmtK77Py995HI5Dh48iNTUVHz33Xd44403kJ6eLq0UbYwQAosXL65xtW65XI4WLVrU+Lq5c+ciICAAmzZtgkwmg4uLi95/v/vV9PnqdDq0atWqxvCYOnWq3v1UrW/ixInYsGED+vfvj+PHj2PMmDHSc82bN6/0vu4/ple1JiEEvLy89IZZ1T8GyfR4jI5qLTIyEgsWLMClS5cQFBQEABgxYgTWrFmDa9euAQCKior0Tqw4ffo0+vTpg1deeQWPPfYY0tPT0b9/f/z88884ceIEgIoFJ+8dzzCkuLgYbdq0gaOjIwoLC5GQkCA95+joWGntuREjRuDjjz+GVqsFUDEZpKa16by9vXHmzJlqj/fs2RP5+fnSF9fq1aulHtHAgQOl4zLHjx+v1SSYMWPGYNOmTbhx4wauX7+OrVu3VvpSvUetVkOj0eDcuXPo1q0bQkNDsWDBAoSGhgIAgoKCcODAAWlG55o1a/SusxYSEoINGzYAAHJzc/Hjjz8CqFiCqbCwEIMHD8bChQvRokULvTNN161bBwD45ptv0LVrV9jb22PEiBFYvny5FB55eXnS8UtDiouL4e7uDplMhq1bt1bq8ejbFwDs3r0bFy9ehBAC8fHxCAkJQevWreHt7Y3Vq1dLr7+3fJCx/dyvpKRE6i1+/vnnRt8DAAwbNgwff/yxdP/q1avo1asXrl+/jj179gCoCL6jR49KbU6fPi0dZ6aGwaCjWouMjMS6deswfvx46bEhQ4bg2WefxcCBA+Hn54fBgwfr/aJ85ZVX4OPjA7VajYKCAkyaNAnt2rXDxo0bERMTA39/f6jVaqSmphqtZfjw4XBwcEDPnj0RERGBgQMHVqrz008/lSajvPHGG3BwcIC/vz98fHwwffp0aRLD/UaNGiV9Od2vefPm2LBhA2JiYuDn54ddu3bho48+AlAxkSMtLQ3e3t7497//DR8fH6k3NH/+fCgUCuTk5OCRRx7BqFGjAAA9evTAs88+C7VaDbVajVmzZknDYvezsbFBQECA9KUYEhKCrKwsaZjX1dUVn376KcLCwuDr64tLly5h7ty5NX5ezz77LGQyGXr16oVp06ZhwIABACqCIDw8XJpMER4eXuO6cnK5HNnZ2fDz88O8efOkUImOjkb//v0RGBgIX19fjB07Vvqjx5DXXnsN//nPf6BWq7F3795Ky8/o2xcADBgwQJpU07JlS0yfPh0AsH79emlijkqlkibrGNpPVe+99x5CQkLQt29fODs7G30PQMVpIseOHYOPjw/8/f2xa9cu2NnZITExEfPnz4e/vz+8vb0r/SGWmpqK4cOH12r7VD+4TA/RHy5fvozw8HAcOHCgVkOQQMWxIJlMBjs7O5w5cwaDBg3C6dOnLW5oqiHX9NO3r7Vr12L//v1YuXJlg9RhCj/99BOWLVsm9VipYfAYHdEf2rdvj1dffRX5+flwc3Or1WsuXryIMWPGQKvVQgiBzz77zOJCjupPYWEh/vOf/5i7DKvDHh0REVk0HqMjIiKLxqAjIiKLxqAjIiKLxqAjIiKLxqAjIiKLxqAjIiKL9v8ByZ8Yy4M6gJYAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 319;\n", + " var nbb_unformatted_code = \"surv.plot(marker=\\\".\\\", ms=1, lw=0.2, label=\\\"data\\\")\\ndecorate(xlabel=\\\"Inverse rate (log10 words per appearance)\\\", ylabel=\\\"Tail probability\\\")\";\n", + " var nbb_formatted_code = \"surv.plot(marker=\\\".\\\", ms=1, lw=0.2, label=\\\"data\\\")\\ndecorate(xlabel=\\\"Inverse rate (log10 words per appearance)\\\", ylabel=\\\"Tail probability\\\")\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "surv.plot(marker=\".\", ms=1, lw=0.2, label=\"data\")\n", + "decorate(xlabel=\"Inverse rate (log10 words per appearance)\", ylabel=\"Tail probability\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The tail distribution has the sigmoid shape that is characteristic of normal distributions and $t$ distributions, although it is notably asymmetric.\n", + "\n", + "And here's what the tail probabilities look like on a log-y scale." + ] + }, + { + "cell_type": "code", + "execution_count": 320, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAboAAAD/CAYAAACHFRPuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAAuJAAALiQE3ycutAAAua0lEQVR4nO3deVxU9f4/8NcsggsoCqLiQJi4DduACJmo5FqJG6H5NQ1LulhjYnUzu9a1255lX7tG5fdi6U+8LaaSaddSb2RaKi4kimuFMKCgiOAGyMzn9wc5ObINDHDODK/n48EDzpmZc94zzGNe8/mcz/kchRBCgIiIyEEppS6AiIioOTHoiIjIoTHoiIjIoTHoiIjIoTHoiIjIoTHoiIjIoTHoiIjIoamlLqA+HTt2hEajkboMIiKSMYPBgNLS0hpvk33QaTQaZGVlSV0GERHJmFarrfU2dl0SEZFDY9AREZFDk33XJRER1a+1TFusUCga/JgWD7pr167hiSeeQPv27dG3b1/Mnz+/pUsgInIYN27cQG5uLsrLy6UupUU4OzvD29sbbdq0sfoxNgddYmIi1q9fj3PnzqGystK8Pi0tDXq9HuXl5YiKisKKFSugUqmwYcMGjB8/Hg888ACmTp0KvV7foIKJiOhPubm5cHV1ha+vb6NaO/ZECIGioiLk5ubizjvvtPpxNh+jmzJlCvbv32+xzmQyIT4+HuvWrcPp06dRWlqKlJQUAFX/FB8fHwBA165dceHCBVtLICJqlYQQKC8vh7u7O5RKJRQKhUP/KJVKuLu7o7y8vEFdtTa36CIjI6utS09Ph5eXl3m45+zZs5GUlIS4uDh4e3sjNzcXgwYNwoULF+Dh4WFrCfUymQT+e6IQfTxdoLzlG8/tX35u/TZkEgJH80pg+uPFVCoUCOjZCSql5X2O5V9G/x6uOH72j9/nLmNAD1cczS+FAoC/V0ccO3sZ/l4doVQoqrZ7thT+Xh0BAJl5Jbj5/1IqFBjQwxXf/HIWPbo4Y0vGOYwN8sR3meexKLo/nFQq/JJ7Cb8WXoZCqcT9Qd2x4vtfYRKmW56rCSfOlaKsUoF5o/sg1KczvjpogKG4HBF+nfH1oXwEeXcCjALfnzqP3l074OKVG3B3aYOCS9dx6vxV+Hl2wJVyI/p0cwVMAicLr8CtfRsYK404nFeCLi5tcaenK+4L6oZ3t57Ane7tkJVXguIyYKCmLX7OLYN7G6DoRtUbrBKAbycVzpQY0TqOIlTxBZD25jipy6BWwNFbcreSzTE6g8EAb29v87KPjw9yc3MBADExMdDr9UhLS8Pdd99drdsyKSkJSUlJ5uXi4mKb68kwXMLLX2fh5Qn+CNR0AoBqH7i3fzk4nHcJf//qKG4Yq0KkjUqJN2ICEdizk/k+R/NL8dLXRzFz8B1Y8/MZzLzrDqzZU/U7edfvUAB4NNIXKXtysHi8FgE9OyHTUIKXN2fh79FVXwIWpR6x2MdYbTd8ui8HSgVwwwR8m1UAkwCMJoHJoT2xcEMmLl27AaUC2JFVgL3Ztb8+8/6dgSlhGqzdW/Xap+w9A6MA/nO00Hyffdkl1R6Xc6mi6rYzNZ98eeH6dZw8fx1b/9jOmeIK820/55YBqAo5oCrkACC7xFhrnY4qG4Dvwi0tus9+AL5luJIMpKWl4dVXX8X27dtrvc/LL7+Mv//9781eS7MEXV1Nyvbt2+OTTz6p9Xa9Xg+9Xm9eruskQGvpNG5YNk0HncYNSqV13wbu6euJD2aEQpj+bNGF+HS2ePwIF2d0+R8nBHl1wiDfLlW/e1X9HujbGQoBBGvcEN7L3bzvbq5t4eHqDJ3GDQDQxcXJYh+BXp0wwKsjfLq0xfoDeZgU7IWvDp/FGxMD4eSkQhcXJ5w4WwqlQoHJQT2x7PuTMFVatuiy8ktw1QgsHNsfg3q5o293V+RdvIZIPw+sP2iATuMGE0z477FC9O3qggtXKtDV1Qlni6/hWOEV9Pd0RWl5Jfp37wgIgePnLsOtQxsYKytxyFACD5d2uNPTFRN0PfDmN8fg594emXnFKC4DBmna4cec6/BoA1y4AbQBcANArz9adH9WSs3hBBofrtkMSGphdh10N7snb8rJyWnwNF6pqalITU3F5cuXba5HqVQg1Kdzgx8TdkcXq7d7++9bH3vrvm+vpaZ9PBRxBwBgaJ9uAICoAd3Nt4X7uiPc1928/Ldx/vU+l7jBvcx/D+vXzfz37KF96n1sfcYE9LR5G47ms0NHsPDzM1KX0WDWBCTDkOqydu1a/OMf/4CrqyuGDRsGADhw4ADmzZuHa9euQQiBJUuWYMyYMXjqqadgNBqh0+ng4eGB7du348knn8SePXtQVlaG4OBgrFy5Es7OzjbXpRBNdPKFWq02j7o0Go3o06cPNm/eDK1Wi6lTp+K+++7DI4880uDtarVaTgFGVI+W7iK93cePajGib6/670hNSgiB48ePo3///lAoFDCaBM5frv80A5MQyMovhfaPsQP16erqbDE+oSbnzp1DaGgoDhw4gO7duyM2NhYlJSXYsGED2rVrhzZt2iAvLw/Dhg3Dr7/+CsAyNwCgqKgI7u5VX+TnzZsHf39/JCQk1Pmcb6orK2xu0SUkJGDLli0wGo3QaDSYOHEikpKSkJycjNjYWJSXl2P48OGYOXNmg7bblC06IkfX2JZWUwXkox9nAbD8kGHrT76y/hhf8NJ4fwTcMu7AFnv37kVkZCR69OgBAHj44YexfPlyXLlyBfHx8cjKyoJarUZubm6tAxG/+uorfPjhhygrK0NJSQlMpqY52GFz0K1YsaLG9SNGjLCpJTZp0iRMmjSpSY7REVHNrAmjxobh7Y9j8DU/lVKB7p3a1ns/T1dndHFxatC4hcZatGgRQkND8fnnn0OhUMDd3R1lZWXV7pednY2XXnoJBw4cQNeuXbF8+XL88ssvTVKDbKcAY4uOSB7qCqiGhODN+zLwpNeYcQv1iYiIwBNPPIFz586hW7du5nOnS0pK0LNnTygUCnz55Ze4ePGi+THt27fH1atX0aFDB5SWlqJdu3bo3Lkzrl27hjVr1iAoKKhJapNt0LFFRyR/t4eWNcF3630Yeo6je/fuWLJkCYYNGwZXV1cMHz4cxcXFeP755/Hwww9j6dKlGDp0qHnCEACYO3cuBg4cCI1Gg+3btyMqKgr9+/dH165dER4eXmPLrzGabDBKU7vZotuxY4fFCE4ish/WtvgYeI1T28AMR9aYwSiyvUzPpEmTsGrVKri6ukpdChE1Uvab45D95jgMqOd+vgu3SD5ylByXbIOOiBzHf/4IvPpabgw7ag6yPUbHwShEjulm2NUWahy0Qk1Nti06dl0SObb6Wnhs3VlPpkMtmkVjnqtsg46IWof6wm44A69WCoUCzs7OKCoqgslkghDCoX9MJhOKiorg7OzcoME3sh11eROnACNqPepqxbErs2a8wniVurJCtkHH0wuIWieGXePI9KO8ydXWkrPLoLuJLTqi1qm2wGPYUU3s8jw6Imrdags0DlKhhmLQEZFsMeyoKcg26FJTUzFr1iyeR0fUyjHsyFY8RkdEdqG2YJsZArzyII/btXY8RkdEdq+2lt2aQ0DyngMtXA3ZEwYdEdmN7DfHYXZE9fWvpp7Drl9zWr4gsgsMOiKyKy9OrnnqsBn/ypSgGrIHDDoisks1hR0HqFBNGHREZLcYdmQN2QYdTy8gImv85e7qH2MMO7oVTy8gIrvH6cKIpxcQkUPjSeVUFwYdETkEhh3VhkFHRA6jtrC7j2HXqjHoiMih1BR2xySog+SjxYOusLAQjz76KIYOHdrSuyaiVoKnHdCtrA66xMREaDQaqNVqi/VpaWnw9/eHn58f4uPjYTQa69yOp6cnPv74Y7i7uzeuYiIiKzDs6Carg27KlCnYv3+/xTqTyYT4+HisW7cOp0+fRmlpKVJSUgAAmZmZiI6OtvhJT09v2uqJiOoQ1rb6OoZd66Ou/y5VIiMjq61LT0+Hl5cXtFotAGD27NlISkpCXFwcAgMDsXnz5qarlIiogb58aVyNwea7cAvPsWtFbDpGZzAY4O3tbV728fFBbm5unY8pLy/HnDlz8Msvv0Cv19uyeyKiejHQyOoWXU0aM6mKs7MzPvroo1pvT0pKQlJSknm5uLi4UbUREd2U/Wb1lh1bda2HTS06b29vixZcTk4ONBqNTQXp9XpkZWXh9ddfR3h4eLXBL0REjcHBKa2XTUEXFhYGg8Fgnl9s5cqViImJaZLCJk2ahFWrVsHV1bVJtkdEtGSab7V1DDvHZ3XQJSQkQKPRwGg0QqPRQK/XQ6VSITk5GbGxsejduzdcXFwwc+bMJimMVy8goqY2Vedf4/pvsk61cCXUknj1AiJqdWpqxfF4nX2zy6sXsEVHRM2Fx+taF9kGHY/REVFzqins/rqKYeeIZBt0REQt7cvjUldAzUG2QceuSyJqbuzCbB04GIWIWj0OTrF/djkYhYiopUR5VF/3r5/3V19Jdkm2QceuSyJqKav+Wr319tpXBRJUQs2BXZdERH9gF6b9YtclEZEVagq1VzZycIq9k23QseuSiKRw523LK/dKUgY1IXZdEhHdhl2Y9oddl0REDVBTqD2exC5Me8WgIyKqwV/utvx4/E9uLXck2WPQERHV4G8T7qu2jrOm2CfZBh0HoxCR1GrqwryPYWd3ZBt0vHoBEcnBu9Mtx2EeA5B9/pIktVDjyDboiIjkICZoAAIVluuilu6WphhqFAYdEVE9vn6jehdmNLsw7QaDjojICrcfrzsCYPPRk9IUQw3CoCMistK0AMvluWtOSVMINYhsg46jLolIbt6cMQ5tb1s36x12YcqdbIOOoy6JSI6O39aFmXYB2H7iN4mqIWvINuiIiOTqiWFOFsvxnxyTqBKyBoOOiKiBFtw/Gv1uW/fcGnZhyhWDjoioEb69rQvz86OAoahUomqoLgw6IqJGemq05RiCyLd/lKgSqguDjoiokRJHDoNObblu8Tp2YcpNiwfdli1bEB8fj4ceegirV69u6d0TETWp1FctuzBXH+BcmHJjddAlJiZCo9FArbb8+pKWlgZ/f3/4+fkhPj4eRqOxzu2MGzcOycnJWLt2LTZs2NC4qomIZGTJNF+LZc6FKS9WB92UKVOwf/9+i3Umkwnx8fFYt24dTp8+jdLSUqSkpAAAMjMzER0dbfGTnp5ufuwbb7yBxx57rImeBhGRdKbq/HG/j+W6VekZktRC1SmEEKIhD1Cr1aisrAQA7N27F88++yx27twJAPj222+RlJSETZs21bmNV155BX369MG0adPq3Z9Wq0VWVlZDSiQiksTtF2at6Xp21DzqygqbjtEZDAZ4e3ubl318fJCbW/f15v/1r3/hs88+Q1paGl566aVqtyclJUGr1Zp/iouLbSmRiKjFLIz2sFh+ZSMHpsiBuv671K6BjUEAwGOPPVZnl6Ver4derzcva7XaRtVGRNTS5kRG4M3Nf4bbyr3Ag4OL0Le7u4RVkU0tOm9vb4sWXE5ODjQajc1FAZzUmYjs065nh1osj1m2R6JK6Cabgi4sLAwGg8HcL7py5UrExMQ0SWGc1JmI7JHGvSOevKedxbrkPQckqoaABgRdQkICNBoNjEYjNBoN9Ho9VCoVkpOTERsbi969e8PFxQUzZ85sksLYoiMie/XM2BEWy6+mnsOVqxUSVUMNHnXZ0jjqkojsUUbuOUxKsmzJcRRm82m2UZfNiS06IrJnOu/uSBzZwWLdFxlHJaqmdWOLjoioGd1+bt1PC4bBqwvHHjQ1tuiIiCTy3fy7LJbvXrJTokpaL9kGHUddEpEj6NvdHa9O8bZYty87T6JqWifZBh0RkaOYMTDIYnnqRxnSFNJKyTbo2HVJRI5kwxOhFstbj52WqJLWR7ZBx65LInIkoT494OKsMC/PWX0CF0quSVhR6yHboCMicjQ7n7nHYjnsje8lqqR1YdAREbWQLh3b4eNHLSeq33n6jETVtB6yDToeoyMiRzSiby/c0cXZvPxw8hF2YTYznjBORNTCrlytQMAr2yzWcXow29jlCeNERI7KpYNTtVGYx89ekKgax8egIyKSQKhPD7Rt8+fyve/tRWWlSbqCHJhsg47H6IjI0e18Jspi+Z2v90pTiIOTbdDxPDoicnSebh3wWUKwefmjvRdRUWGUsCLHJNugIyJqDe7qpbFYfu7fP0pUieNi0BERSWzT3DDz3xuPX4WhqFTCahwPg46ISGJBmm7wdP1zZErk2z/CZJL1mV92hUFHRCQD380bZrH8WQYnfW4qDDoiIhlwc22Lb+aFm5f/9sVJlJVVSliR45Bt0PH0AiJqbbReXS0+lOes4sCUpiDboOPpBUTUGqX9NfLPv7OvIfv8JemKcRCyDToiotbIx6MT3p8WaF6OWrqbozBtxKAjIpKZ+4O8MW+Mr3k58u0fcelymXQF2TkGHRGRzCiVCsyP0qJ7RyfzuuFv7pCwIvvGoCMikiGlUmFxykGJseryPtRwDDoiIpnq6OIMbXcX83Lwq9twsfS6hBXZpxYPulOnTmHOnDl45JFHsGjRopbePRGRXfniscFwb68CABgFMOj1//L8ugayOugSExOh0WigVqst1qelpcHf3x9+fn6Ij4+H0Vj3zNt9+vTBRx99hE8++QQnT55sXNVERK2ESwcn/LxwNDq0qfq4NgJI/HSftEXZGauDbsqUKdi/f7/FOpPJhPj4eKxbtw6nT59GaWkpUlJSAACZmZmIjo62+ElPTwcAbNu2DTExMRg0aFATPhUiIsfk5KTC7gX3mJe/O1HM43UNoBBCNGjmULVajcrKqmbz3r178eyzz2Lnzp0AgG+//RZJSUnYtGmTVduKjo7Gxo0b0aZNm1rvo9VqkZWV1ZASiYgc0hNr9uObowUAgC7tVdizcDScnFQSVyUPdWWFTcfoDAYDvL29zcs+Pj7Izc2t8zG7d+/GvHnz8PjjjyMkJKRayCUlJUGr1Zp/iouLbSmRiMhhvDtFB5c/ujAvXjPi9S2HJa7IPqjrv0vtGtgYBAAMGTIEQ4YMqfV2vV4PvV6P1NRUpKamYscOnjtCRAQAbduq8dNzIxD86nYIAP9vbz6eHq1FRxdnqUuTNZtadN7e3hYtuJycHGg0mjoeYT3OdUlEVF1HF2d8EjcQAGACEP3PNEnrsQc2BV1YWBgMBoO5X3TlypWIiYlpksJ49QIiopoN69cNXf445eDslUpcu3ZD4orkzeqgS0hIgEajgdFohEajgV6vh0qlQnJyMmJjY9G7d2+4uLhg5syZTVIYW3RERDVTKhWYG9UHAHDDBIx+7wdUVNR9aldr1uBRly3l1mN09Q1wISJqbSorTYj5cDcO51Vd2WD23d54cUKQxFVJp9lGXTYntuiIiGqnVivxxWOD8UcPJv6TeQ6VlSZpi5Ip2QYdERHVrW1bNUZquwMA8i/fwOo9v0lckTzJNug4GIWIqH5vPxCMdn98km9Iz5G2GJmSbdCx65KIqH5t26oRpKn6nHRxtunUaIcl26AjIiLr3Jxs/7eiMl7ZoAayDTp2XRIRWefmqQbnr97AX788JHE18iPboGPXJRGRdSL8PBDYs+qz8lBuCc+pu41sg46IiKyjVCrwt7EDoFIAeSXliH5/F7swbyHboGPXJRGR9SL8PLD2LxHw7uSEk4VXsGDDL1KXJBuyDTp2XRIRWU+pVOCuXh4Y6191Xt3Fq+U8gfwPsg06IiJqOF+PqsbBrl+LsTb9jMTVyAODjojIgUwL90Fk784AgG1HOC0YwKAjInIoarUS80b0g5NKgV2/XsRf1//S6sNOtkHHwShERI0T1qsL/j5Bi7ZqBVIP5ePtbSdgMsnyQjUtQrZBx8EoRESNo1QqMH3QHXhhvBbOKmDNT9nYd6ZI6rIkI9ugIyKixrsZdmP9u+HaDRO+zyqQuiTJMOiIiByUUqlAuK8HACDv0nXs+62oVXZhcqprIiIHNi3cB3mlZdhwIBffHy/ElEE+eOF+LdTq1tPOaT3PlIioFVKrlXh2TD8kjuoLpUKJ1T+dwecHcqUuq0XJtkWXmpqK1NRUjrokIrKRUqnAtEE+MEFgyTfHcSy/BJWVplbTqpPts+SoSyKipnNzcMr/RPhg7d5cfLq/9VyNXLZBR0RETUupVMCnS3sIAAeyL7aaE8kZdERErciDYT4Yq/XEpl/OYun2k61iFCaDjoioFVGrlUiaPhCTdV74bF8O9udclLqkZsegIyJqZdRqJcJ8u+DStRv4NvOcw7fqZDvqkoiIms+Ugd7IKb6ODQcN6NzBCU9E+UGpVEhdVrOQpEVnNBpx//33Y9myZVLsnoio1bt5ft38UX2xdu8ZrNz1u8O27KwOusTERGg0GqjVlo3AtLQ0+Pv7w8/PD/Hx8TAajfVu63//938xYcKEhldLRERNRqlU4MEwb8wd0Qcfpv2Kg7nFUpfULKwOuilTpmD//v0W60wmE+Lj47Fu3TqcPn0apaWlSElJAQBkZmYiOjra4ic9PR3p6elo27Yt+vfv37TPhIiIGkypVKB/N1cIYcK2o455oVarj9FFRkZWW5eeng4vLy9otVoAwOzZs5GUlIS4uDgEBgZi8+bN1R7z2muvobCwEGlpaTh//jymTJmCnj172vAUiIjIFiE+nREzUIPkXdnw9XDB/4T7SF1Sk7JpMIrBYIC3t7d52cfHB7m5dc+htmjRIgBVXZ4ZGRnVQi4pKQlJSUnm5eJix2xKExHJhVKpwPP3DkAblRJ3dG4Hk0k41MAUmwajCNH4A5dRUVGYP39+tfV6vR5ZWVnmn86dO9tQIRERWUOtVmJkf0/oPz2Egw52bp1NLTpvb2+LFlxOTg40Go3NRQGc1JmIqKUpFQo4q1U4nFeC6xUmDPHzcIiWnU0turCwMBgMBmRlZQEAVq5ciZiYmCYpjJM6ExG1rBCfzvhgRigCvTph/ucZDjMK0+qgS0hIgEajgdFohEajgV6vh0qlQnJyMmJjY9G7d2+4uLhg5syZTVJYamoqZs2axRYdEVELUSoVCPXpDKVCASEETp277BDn1imELQfaWoBWqzW3GImIqPmZTALLtp9EakY+lk3TIdRH/mMl6soK2U4BxmN0RETSUCoVmBvlh0qTQJBXJ6nLsZlsJ3XmMToiIukcOVeKjYfycDi/ROpSbMYWHRERVRPk1QkxIT3ZomtObNEREUnncH4JNjhIi062QUdERNLRadyQOLIPhEnY/chL2QYdTy8gIpKOUqmAUqHAM+t+QYbhktTl2ISnFxARUY32ZRfheH4ppoffAbVatu0iAHVnhbwrJyIiyagVCnz4w2/47tg5qUuxiWxHXRIRkbR03p2R9FAo3Ds4IT27CCqFAjrvznY3/6VsW3Q8RkdEJK2bU4Ld4d4BRhPw+NqDOJhjf/Nf8hgdERHVy2QSOJhTjKsVlYjo5Y62bVRSl2TBLqcAIyIi+VAqFQjz7QIhBHb/egEQwN297eMyPrLtuiQiIvlRKBRo76TGs18etpvTDhh0RETUIDqNG+aP6gudxk3qUqwi265LznVJRCRPSqUC/bq72kW3JSDjFh3nuiQiki+jyYSDOcV2MT2YbIOOiIjk61TBFTz9eYZdHKeTbdclERHJV3RQDziplXZxnI4tOiIiajCXtm1wh3sHuzhOx6AjIqJGMQlhF8fpZBt0nAKMiEjeym8YoV97UPbH6TgFGBERNYrJJPBd1jn07eaKO7u6SFoLL9NDRERNTqlU4N6AHrhabsSlaxVSl1MrBh0REdkkUNMJWfmlMMr0WB2DjoiIbBbeqwv2/l4kdRk1YtAREZHN1ColtD064kheidSlVMOgIyKiJuHW3gntnVTYcjhfVqcctHjQpaWlYciQIZgzZw7WrVvX0rsnIqJmdOn6Dbz+zTH858hZqUsxszroEhMTodFooFZbzhqWlpYGf39/+Pn5IT4+Hkajsc7tKBQKuLi44Nq1a7jzzjsbVzUREcmSTuOG5dND0aebC7IvXJW6HAANOI9u165d8PPzg0ajQWVlJQDAZDKhb9++2LRpE7RaLaZOnYpx48YhLi4OmZmZeP755y22sXjxYgwcOBBKpRJXr17F1KlTsWXLljr3y/PoiIjs0+nCK1ApFejl0aHZ91VXVlg9qXNkZGS1denp6fDy8oJWqwUAzJ49G0lJSYiLi0NgYCA2b95c6/Y6dOgAmZ+rTkRENvDzdMHpwiv4/cLVFgm72th09QKDwQBvb2/zso+PD3Jzc+t8zIYNG7B161ZcuXIFcXFx1W5PSkpCUlKSebm4uNiWEomISEJyCDubgq4xLbKYmBjExMTUerter4derzcv32wtEhGRfZI67Gwadent7W3RgsvJyYFGo7G5KICTOhMRORI/TxcYTQK/nb/S4vu2KejCwsJgMBjMBwBXrlxZZ2utISZNmoRVq1bB1dW1SbZHRETS8vN0gQBaPOysDrqEhARoNBoYjUZoNBro9XqoVCokJycjNjYWvXv3houLC2bOnNkkhbFFR0TkeHp3bfmw42V6iIioxf16/goUQJNd3scuL9PDFh0RkePq3dUFRiHw868Xmn26MNkGHY/RERE5tstllXh+Q2azX6HcptMLmlNqaipSU1PZoiMiclA6jRvefVAHncatWffDY3RERGT37PIYHRERUVNg1yURETk0dl0SEZHdY9clERG1Wgw6IiJyaDxGR0REDk32x+g6duxY4xURiouL0blzZwkqahx7qxdgzS2FNbcM1twypKrZYDCgtLS0xttkH3S1sbdBKvZWL8CaWwprbhmsuWXIsWYeoyMiIofGoCMiIodmt0Gn1+ulLqFB7K1egDW3FNbcMlhzy5BjzXZ7jI6IiMgadtuiIyIisoZdBV1iYiI0Gg3Uatme/ldNbm4uRo4ciQEDBsDf3x/PP/+81CVZZcyYMdDpdAgMDERsbGytw3blRq/X29X7w9fXF/7+/tDpdNDpdMjMzJS6pDpdvXoVcXFx6NevH/r3748VK1ZIXVKdCgsLza+tTqdD9+7dMXnyZKnLqldKSgqCgoKg0+kwdOhQnDhxQuqS6vXRRx8hICAA/v7+eOKJJ2A0GqUu6U/Cjvz444/i7NmzQqVSSV2K1fLz80V6eroQQojy8nIRGRkpUlNTJa6qfpcuXTL/nZiYKBYvXixdMVbauXOnePjhh+3q/XHHHXeI3NxcqcuwWkJCgnjrrbeEEEKYTCZRUFAgcUUNM3LkSLF27Vqpy6jT1atXRZcuXcT58+eFEEJ8+OGHIjY2VuKq6nbkyBHRu3dv8+fGvHnzxOrVqyWu6k921aKLjIxE9+7dpS6jQXr06IGwsDAAgJOTE0JCQpCTkyNxVfXr1KkTAMBkMqGsrAwKhULiiupWXl6OhQsX4p133pG6FId1+fJlbNq0CU8//TQAQKFQwNPTU+KqrJefn4/9+/dj0qRJUpdSJ5PJBCEErly5AgAoKSlBjx49JK6qbllZWYiIiDB/bowdOxaff/65xFX9ya6Czt5dvHgRqampGD16tNSlWGXy5Mnw9PTEiRMn8Mwzz0hdTp1efvllzJ49G127dpW6lAYbP348dDodFi1ahBs3bkhdTq1+++03dOvWDXPnzkVoaCgmT56MM2fOSF2W1T799FNMmjQJ7du3l7qUOrm4uOD9999HQEAAevbsidWrV+OVV16Ruqw6BQUFYdeuXcjPz4fRaMS6deuQm5srdVlmDLoWUlFRgdjYWCQmJqJ///5Sl2OVjRs3Ij8/HxqNBl9++aXU5dTq8OHD2Lt3Lx555BGpS2mwH3/8EYcOHcLu3btx4sQJWbdIKysrkZGRgdjYWBw8eBDjx4/Ho48+KnVZVktJScGMGTOkLqNeN27cwAcffID09HTk5eUhNjYWzz33nNRl1alfv3547bXXMGHCBERGRsLb2xsqlUrqsswYdC3AaDRi+vTp0Ol0sm8Z3c7JyQnTpk3Dxo0bpS6lVrt370ZWVhZ69eoFX19fGI1G+Pr62sUAGm9vbwBAhw4dEB8fj59++kniimqn0Wjg7u6OUaNGAQCmTZuGAwcOSFyVdbKysnD+/HmMGDFC6lLqlZGRASEEBgwYAKDqdZbz++KmGTNmYP/+/fj5558REhIiqy/0DLoW8Je//AWurq5YunSp1KVY5fLlyzh79iyAquMFmzZtgr+/v8RV1e7xxx9Hfn4+srOzkZ2dDZVKhezsbHTs2FHq0up09epVcxgbjUasX78eQUFBEldVu27dusHf3x8HDx4EAGzbtk3W74tbrVmzBtOnT4dSKf+PPI1GgxMnTiAvLw9A1eus1Wolrqp+BQUFAIDS0lK89dZbePLJJyWu6E92dcJ4QkICtmzZgry8PPTs2RMTJ05EUlKS1GXVaffu3YiMjERAQIC5Kf/oo49i3rx5EldWu/z8fEycOBHl5eUwmUyIiIjA8uXLZX9s4ya1Wo3Kykqpy6jXb7/9hpiYGJhMJhiNRgwePBj//Oc/Zf06Z2VlYfbs2bh69Src3NywYsUKc8tDroQQ8PX1xddffy3rLxK3Sk5Oxrvvvos2bdqga9eu+Pjjj+Hj4yN1WXW65557UFBQACEEnn32WVl1a9tV0BERETWU/NvxRERENmDQERGRQ2PQERGRQ2PQERGRQ2PQERGRQ2PQERGRQ2PQUaPZ0+VwrJGamorDhw836rFjxowxn+DbnBztNbcHJpMJQ4cOxaVLl6QuhRqJQUey1pQnfte3rcYG3Y4dO9CzZ0/07NmzsaXVSOqT3qXef31aqj6lUolZs2bhvffea5H9UdNj0FGTUKvVePnll6HT6RAUFISTJ09CCIFevXpZtHTGjRuH7777DgCwfPlyhIeHIzg4GPHx8eaZ+319fbFw4UKEhYVh+fLl2Lhxo/kilEFBQeYZ89PS0hAZGYmBAwdizJgxNc6WvmrVKkRHR2Ps2LEICAgAAMTGxiIsLAwBAQF46qmnAADff/89Nm3ahEWLFkGn0+Gnn37C9evXMWfOHISHhyMwMBDvv/9+jc999erViI2NBVAVljcnl05NTYVarTZP8+Xn54eSkhJUVFTg8ccfR0BAAAICApCcnGzxOr7wwgsICQnBhg0bkJGRgZCQEAQFBWHx4sXm+xUUFGDEiBHQ6XTw9/fHBx98UONzHz9+PEaNGoV+/fohLi4OFRUVAIAzZ84gOjoaYWFhCAsLww8//FDr63Wr119/HYMGDUJwcDCio6NRVFRU776ioqLw1FNPITQ0FH369MFnn31m3t66desQERGBkJAQPPDAAygpKal3P9b8PwFg1qxZmDt3LiIjI9GrVy+LWZT++9//mt97gwYNwsWLFwHU/p6MiYnBmjVravz/kx2Q7Ep4ZPduvcApAPHpp58KIYR4++23RXx8vBBCiOeee0688847QgghLly4IDQajaisrBQ7duwQM2bMEEajUQghhF6vFx988IEQoupipC+++KJ524GBgSI/P18IIcS1a9fE9evXRVFRkRg8eLD5Qo9ffPGFmDp1arUaP/nkE+Hh4SHOnTtnXnfhwgUhhBBGo1FMnDhRbN26VQghRFxcnFizZo35fi+++KJYsWKFEEKIsrIyMXDgQHH06NFq+/D19TVv/8KFC6J3795CCCHmz58vwsPDxZYtW8Tvv/8uBg4cKIQQYtmyZeKBBx4QRqNRFBUViV69eokjR46YX8eVK1eatx0UFGSub9myZebXfOnSpeL111833+/ixYs1Pnc3NzdhMBiEyWQSMTExYvny5UIIIUaMGGHe55kzZ0SvXr2EyWSq8fW61c3X7mYNCxcurHdfw4cPF1OnThUmk0nk5OSIbt26iYKCAnH8+HExevRoUVZWJoQQYsmSJWLBggX17qch/89x48aJyspKkZeXJzp16iQqKirE+fPnhZeXlzh27JgQQojS0lJRXl5e53tSCCH8/PyEwWCo8XUheWOHPzUJhUKBBx54AAAwaNAgbN26FQDw0EMPIS4uDs888wy++OILxMTEQKVS4ZtvvsHOnTsRGhoKACgrK0O7du3M23vooYfMf0dFRWHGjBmYNGkSJk6cCB8fH2zfvh0nTpzA8OHDAVQdR3F1da2xtlGjRqFbt27m5f/7v//DF198AaPRiMLCQkRGRmLs2LHVHvfNN9/g+vXr5tZSaWkpTpw4UW2C3bNnz5qvg+fu7o4OHTogNzcXP//8M55//nn88MMPKCwsNNealpaG+Ph4KJVKdOnSBRMmTMAPP/xgniB5+vTpAKouuHn27FlzbbNmzTJf/SIiIgKPPPIILl++jHvvvRfDhg2r9bnf7FJ9+OGHkZKSglmzZmHXrl0Wr3FFRQUKCwtrfL1u9dNPP+GNN97A5cuXcf36dYsZ6mva19y5c83LCoUC3t7eGDx4MNLT0/H7778jMzMTERERAKouTxMYGGjVfqz9f06ePBkqlQpeXl7o3LkzCgoKkJGRgbvuusu8zZvvm/rek56ensjPz2/yLmpqfgw6ahJKpRJt2rQBAKhUKvPxk8DAQFRWVuL48eP497//bb6CgxACTz31FObPn1/j9jp06GD++5///CcOHTqEbdu2Yfjw4UhJSYEQAkOHDkVqamq9td26rR9++AHr16/Hzp074erqimeeeQZlZWU1Pk4IgbVr10Kn09W5/bZt26K8vNz8oTh8+HB89dVXcHZ2xujRo7F06VIUFBQgJiYGAKpdrf3WZZVKhbZt29b7nIYMGYLdu3dj69ateO211/D555/XOMF5TVeGN5lMaN++PTIyMmrc9q2v163Ky8sxa9Ys7Nu3D71798bXX39tcdyqrqvQ13SbEAIPPvggli1b1qD9NOT/6ezsbP775vtS1DK9b33vyduDj+wHj9FRs5s+fTreeOMNFBYWIjw8HABw33334ZNPPjGPZCsuLsbvv/9e4+NPnjyJkJAQLFiwAKNHj0ZGRgYGDx6Mffv24ciRIwCqWgOZmZn11lJSUgI3Nze4urqiqKgI69evN9/m6upqcQ27++67D++99x6MRiMA4NSpUzVe487f3x+nTp0yLw8fPhxLlizB0KFDzR/KO3bsMLe6oqKisHr1agghUFxcjK+//trc2rtVp06d4OXlhW3btgGAxTGi7OxsuLm5YebMmVi8eDH27dtX4/Pdvn07zp49CyEEUlJSMHz4cHTs2BH+/v74+OOPzfe7eemdupSVlcFkMsHT0xNGoxErV66sd183rVmzBkII5OXlYc+ePQgPD8eoUaOwceNGGAwGAMC1a9dw/Pjxevdzq7r+n7UZPHgw9uzZg+PHjwOouixVRUVFne9JIQRyc3PRp0+ferdP8sOgo2Y3ffp0rFmzBtOmTTOvGzVqFObMmYNhw4YhKCgII0eONH/g3W7BggUICAiATqdDQUEBZsyYAQ8PD3z66aeIj49HcHAwdDoddu7cWW8t9957L1xcXNCvXz/ExsZadPlNnz4d77//vnkwygsvvAAXFxcEBwcjICAAjz32mHmAxa0mTJiAHTt2mJeHDRsGg8GAqKgoAFWtLw8PD7i5uQEA5syZA3d3dwQGBmLo0KFYuHBhrdd1W7VqFRYsWICgoCBcuHDBvP77779HSEgIQkJC8OSTT+Ktt96q8fFDhgzBzJkz0b9/f7Rr1w6PPfYYAGDt2rXYuHEjgoODodVqax1oc6tOnTrh6aefRlBQEO666y707dvXqn0BQPfu3TFw4EDcc889ePfdd9G1a1cMGDAA7777LiZMmIDg4GDcddddOHr0aL37uVVd/8/aeHh4YM2aNZgxYwaCg4MxatQoXLlypc73ZHp6OiIiIixaiGQ/eJkeIhudP38eEydOxO7du+vsvmtpq1atwq5duyxGdUqxr6ioKLz66quIjIxs9jqaS0JCAqZOnYqRI0dKXQo1Alt0RDbq2rUrnnvuOZw7d07qUqgZmEwmhIaGMuTsGFt0RETk0NiiIyIih8agIyIih8agIyIih8agIyIih8agIyIih8agIyIih/b/ATkQTAUHkXHgAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 320;\n", + " var nbb_unformatted_code = \"surv.plot(marker=\\\".\\\", ms=1, lw=0.2, label=\\\"data\\\")\\ndecorate(xlabel=\\\"Inverse rate (words per appearance)\\\", yscale=\\\"log\\\")\";\n", + " var nbb_formatted_code = \"surv.plot(marker=\\\".\\\", ms=1, lw=0.2, label=\\\"data\\\")\\ndecorate(xlabel=\\\"Inverse rate (words per appearance)\\\", yscale=\\\"log\\\")\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "surv.plot(marker=\".\", ms=1, lw=0.2, label=\"data\")\n", + "decorate(xlabel=\"Inverse rate (words per appearance)\", yscale=\"log\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If this distribution were normal, we would expect this curve to drop off with increasing slope.\n", + "But for the words with the lowest frequencies -- that is, the highest inverse rates -- it is almost a straight line.\n", + "And that suggests that a $t$ distribution might be a good model for this data." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Fitting a Model\n", + "\n", + "To estimate the frequency of rare words, we will need to model the tail behavior of this distribution and extrapolate it beyond the data.\n", + "So let's fit a $t$ distribution and see how it looks.\n", + "I'll use code from Chapter 8 of *Probably Overthinking It*, which is all about these long-tailed distributions.\n", + "\n", + "The following function makes a `Surv` object that represents a $t$ distribution with the given parameters." + ] + }, + { + "cell_type": "code", + "execution_count": 291, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 291;\n", + " var nbb_unformatted_code = \"from scipy.stats import t as t_dist\\n\\ndef truncated_t_sf(qs, df, mu, sigma):\\n ps = t_dist.sf(qs, df, mu, sigma)\\n surv_model = Surv(ps / ps[0], qs)\\n return surv_model\";\n", + " var nbb_formatted_code = \"from scipy.stats import t as t_dist\\n\\n\\ndef truncated_t_sf(qs, df, mu, sigma):\\n ps = t_dist.sf(qs, df, mu, sigma)\\n surv_model = Surv(ps / ps[0], qs)\\n return surv_model\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from scipy.stats import t as t_dist\n", + "\n", + "\n", + "def truncated_t_sf(qs, df, mu, sigma):\n", + " ps = t_dist.sf(qs, df, mu, sigma)\n", + " surv_model = Surv(ps / ps[0], qs)\n", + " return surv_model" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "If we are given the `df` parameter, we can use the following function to find the values of `mu` and `sigma` that best fit the data, focusing on the central part of the distribution." + ] + }, + { + "cell_type": "code", + "execution_count": 292, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 292;\n", + " var nbb_unformatted_code = \"from scipy.optimize import least_squares\\n\\ndef fit_truncated_t(df, surv):\\n \\\"\\\"\\\"Given df, find the best values of mu and sigma.\\\"\\\"\\\"\\n low, high = surv.qs.min(), surv.qs.max()\\n qs_model = np.linspace(low, high, 1000)\\n ps = np.linspace(0.01, 0.8, 20)\\n qs = surv.inverse(ps)\\n\\n def error_func_t(params, df, surv):\\n # print(params)\\n mu, sigma = params\\n surv_model = truncated_t_sf(qs_model, df, mu, sigma)\\n\\n error = surv(qs) - surv_model(qs)\\n return error\\n\\n pmf = surv.make_pmf()\\n pmf.normalize()\\n params = pmf.mean(), pmf.std()\\n res = least_squares(error_func_t, x0=params, args=(df, surv), xtol=1e-3)\\n assert res.success\\n return res.x\";\n", + " var nbb_formatted_code = \"from scipy.optimize import least_squares\\n\\n\\ndef fit_truncated_t(df, surv):\\n \\\"\\\"\\\"Given df, find the best values of mu and sigma.\\\"\\\"\\\"\\n low, high = surv.qs.min(), surv.qs.max()\\n qs_model = np.linspace(low, high, 1000)\\n ps = np.linspace(0.01, 0.8, 20)\\n qs = surv.inverse(ps)\\n\\n def error_func_t(params, df, surv):\\n # print(params)\\n mu, sigma = params\\n surv_model = truncated_t_sf(qs_model, df, mu, sigma)\\n\\n error = surv(qs) - surv_model(qs)\\n return error\\n\\n pmf = surv.make_pmf()\\n pmf.normalize()\\n params = pmf.mean(), pmf.std()\\n res = least_squares(error_func_t, x0=params, args=(df, surv), xtol=1e-3)\\n assert res.success\\n return res.x\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from scipy.optimize import least_squares\n", + "\n", + "\n", + "def fit_truncated_t(df, surv):\n", + " \"\"\"Given df, find the best values of mu and sigma.\"\"\"\n", + " low, high = surv.qs.min(), surv.qs.max()\n", + " qs_model = np.linspace(low, high, 1000)\n", + " ps = np.linspace(0.01, 0.8, 20)\n", + " qs = surv.inverse(ps)\n", + "\n", + " def error_func_t(params, df, surv):\n", + " mu, sigma = params\n", + " surv_model = truncated_t_sf(qs_model, df, mu, sigma)\n", + "\n", + " error = surv(qs) - surv_model(qs)\n", + " return error\n", + "\n", + " pmf = surv.make_pmf()\n", + " pmf.normalize()\n", + " params = pmf.mean(), pmf.std()\n", + " res = least_squares(error_func_t, x0=params, args=(df, surv), xtol=1e-3)\n", + " assert res.success\n", + " return res.x" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "But since we are not given `df`, we can use the following function to search for the value that best fits the tail of the distribution." + ] + }, + { + "cell_type": "code", + "execution_count": 293, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 293;\n", + " var nbb_unformatted_code = \"from scipy.optimize import minimize\\n\\ndef minimize_df(df0, surv, bounds=[(1, 1e3)], ps=None):\\n low, high = surv.qs.min(), surv.qs.max()\\n qs_model = np.linspace(low, high * 1.2, 2000)\\n\\n if ps is None:\\n t = surv.ps[0], surv.ps[-5]\\n low, high = np.log10(t)\\n ps = np.logspace(low, high, 30, endpoint=False)\\n\\n qs = surv.inverse(ps)\\n\\n def error_func_tail(params):\\n (df,) = params\\n print(df)\\n mu, sigma = fit_truncated_t(df, surv)\\n surv_model = truncated_t_sf(qs_model, df, mu, sigma)\\n\\n errors = np.log10(surv(qs)) - np.log10(surv_model(qs))\\n return np.sum(errors ** 2)\\n\\n params = (df0,)\\n res = minimize(error_func_tail, x0=params, bounds=bounds, tol=1e-3, method=\\\"Powell\\\")\\n assert res.success\\n return res.x\";\n", + " var nbb_formatted_code = \"from scipy.optimize import minimize\\n\\n\\ndef minimize_df(df0, surv, bounds=[(1, 1e3)], ps=None):\\n low, high = surv.qs.min(), surv.qs.max()\\n qs_model = np.linspace(low, high * 1.2, 2000)\\n\\n if ps is None:\\n t = surv.ps[0], surv.ps[-5]\\n low, high = np.log10(t)\\n ps = np.logspace(low, high, 30, endpoint=False)\\n\\n qs = surv.inverse(ps)\\n\\n def error_func_tail(params):\\n (df,) = params\\n print(df)\\n mu, sigma = fit_truncated_t(df, surv)\\n surv_model = truncated_t_sf(qs_model, df, mu, sigma)\\n\\n errors = np.log10(surv(qs)) - np.log10(surv_model(qs))\\n return np.sum(errors**2)\\n\\n params = (df0,)\\n res = minimize(error_func_tail, x0=params, bounds=bounds, tol=1e-3, method=\\\"Powell\\\")\\n assert res.success\\n return res.x\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from scipy.optimize import minimize\n", + "\n", + "\n", + "def minimize_df(df0, surv, bounds=[(1, 1e3)], ps=None):\n", + " low, high = surv.qs.min(), surv.qs.max()\n", + " qs_model = np.linspace(low, high * 1.2, 2000)\n", + "\n", + " if ps is None:\n", + " t = surv.ps[0], surv.ps[-5]\n", + " low, high = np.log10(t)\n", + " ps = np.logspace(low, high, 30, endpoint=False)\n", + "\n", + " qs = surv.inverse(ps)\n", + "\n", + " def error_func_tail(params):\n", + " (df,) = params\n", + " print(df)\n", + " mu, sigma = fit_truncated_t(df, surv)\n", + " surv_model = truncated_t_sf(qs_model, df, mu, sigma)\n", + "\n", + " errors = np.log10(surv(qs)) - np.log10(surv_model(qs))\n", + " return np.sum(errors**2)\n", + "\n", + " params = (df0,)\n", + " res = minimize(error_func_tail, x0=params, bounds=bounds, tol=1e-3, method=\"Powell\")\n", + " assert res.success\n", + " return res.x" + ] + }, + { + "cell_type": "code", + "execution_count": 307, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "25.0\n", + "382.58404523885497\n", + "618.4159547611448\n", + "236.8319095222899\n", + "146.75213571656514\n", + "91.07977380572478\n", + "56.67236191084039\n", + "35.407411894884405\n", + "22.26495001595599\n", + "14.142461878928419\n", + "26.396431469641538\n", + "22.378315526830374\n", + "23.37711806078531\n", + "24.53039316028008\n", + "23.209065736046366\n", + "23.229227282976307\n", + "23.225700629153852\n", + "23.225367269503405\n", + "23.226033988804296\n", + "21.451401258307705\n", + "382.58404523885497\n", + "618.4159547611448\n", + "236.83190952228992\n", + "146.75213571656516\n", + "91.07977380572477\n", + "56.67236191084038\n", + "35.407411894884405\n", + "22.26495001595599\n", + "14.142461878928419\n", + "26.396431469641538\n", + "22.378315526830374\n", + "23.37711806078531\n", + "24.53039316028008\n", + "23.209065736046366\n", + "23.229227282976307\n", + "23.225700629153852\n", + "23.22536729582052\n", + "23.22503396248224\n" + ] + }, + { + "data": { + "text/plain": [ + "array([23.2253673])" + ] + }, + "execution_count": 307, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 307;\n", + " var nbb_unformatted_code = \"df = minimize_df(25, surv)\\ndf\";\n", + " var nbb_formatted_code = \"df = minimize_df(25, surv)\\ndf\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = minimize_df(25, surv)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 308, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(array([23.2253673]), 6.431987258334933, 0.4916380552135185)" + ] + }, + "execution_count": 308, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 308;\n", + " var nbb_unformatted_code = \"mu, sigma = fit_truncated_t(df, surv)\\ndf, mu, sigma\";\n", + " var nbb_formatted_code = \"mu, sigma = fit_truncated_t(df, surv)\\ndf, mu, sigma\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "mu, sigma = fit_truncated_t(df, surv)\n", + "df, mu, sigma" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Here's the `t` distribution that best fits the data." + ] + }, + { + "cell_type": "code", + "execution_count": 389, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 389;\n", + " var nbb_unformatted_code = \"low, high = surv.qs.min(), surv.qs.max()\\nqs = np.linspace(low, 1.1 * high, 2000)\\nsurv_model = truncated_t_sf(qs, df, mu, sigma)\";\n", + " var nbb_formatted_code = \"low, high = surv.qs.min(), surv.qs.max()\\nqs = np.linspace(low, 1.1 * high, 2000)\\nsurv_model = truncated_t_sf(qs, df, mu, sigma)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "low, high = surv.qs.min(), surv.qs.max()\n", + "qs = np.linspace(low, 1.1 * high, 2000)\n", + "surv_model = truncated_t_sf(qs, df, mu, sigma)" + ] + }, + { + "cell_type": "code", + "execution_count": 390, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAboAAAD/CAYAAACHFRPuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAAuJAAALiQE3ycutAAA1WElEQVR4nO3de1xUdf4/8NfMcL8JchHkpoCIw21Qoq+rCeslczEvRK5aiiZpLaVW+y3LctuytLTdytC1vO2q9dXClE2z1MRrvySTNPGCGsKgeAEEFLnNfH5/kJMjDIPIXHk9Hw8ecc6cOec9J+TF53M+53MkQggBIiIiKyU1dQFERESGxKAjIiKrxqAjIiKrxqAjIiKrxqAjIiKrxqAjIiKrxqAjIiKrZmPqAtrDzc0NAQEBpi6DiIjMhFKpRFVVVYuvWWTQBQQEID8/39RlEBGRmZDL5TpfY9clERFZNQYdERFZNYvsuiQi6kw4JfHvJBLJXb/HoEE3a9YsZGVlobS0FI2NjS1uk5OTg4yMDNTV1SEpKQnLly+HTCYzZFlERBahoaEBxcXFqKurM3UpZsPe3h6BgYGwtbVt83skhnx6wf79+xEWFoaAgIAWg06tViM8PBzZ2dmQy+UYN24ckpOTkZaW1up+5XI5B6MQkdU7d+4cXF1d4enp2a6WjLURQqCsrAzV1dUICQnReq21XDDoNbqBAwfC19dX5+u5ubno3r27ZrTMtGnTkJWVZciSiIgsghACdXV18PT0hFQqhUQi6fRfUqkUnp6eqKuru6vuXJNeo1MqlQgMDNQsBwUFobi42DjHLinB9mMXENLVDtLb/lKSoOn7O/94UqnV2Pfrdfi52UAmlSLM0x6y3zZSQ+BseT1CPGzxa0UDQjzsUFjRgJ6edjhbVgdAgjBPO5wrr0cvT3sAwNnyOoR52kMtBHafrYa/my1kUilCPW2x+0wVLl5vhJejFAUVjRgW5orIbk5QqdX4LK8cPi628HOTYfeZ6wjztEVBWQNC3aX4+nQNnB1tMDnOHQ62tqhXqbDjdBU8HGWQSiTwcgS+O3cD3bvYo7enDXaduQ4IwMZGhkE97PHt2VoMC3XA9tM18LAHrtYAUing62aHh+XO+OT/VUAIwBaAsrbpvNgDuL1TxQFArZ5zv2R0V0R4ebX3f10zHfWXbkv/qO78XiqVQiqVwsbGBjKZTPNlY2MDGxte8qaOx5acNrO7RqdPWxM5MzMTmZmZmuWKiop7PvbJyzex4sdyZPT3Rvhv4XOrmltl3V7d3sLrWHPkGhxsJXCwkeLZ/t4I++19Z8rq8a9DZUgOd8XW09X4U7grtp2qxohwV2w5UQVIgNERbth2uhrT7/OEAPBJbhmevM8TJZX1+M+RCjjYSGBvI8H9gU7YXnBdq9YfS2oxq78X8i7exDdnml6TSQCVAPYWNcXKvqLfNq5txJu7r8LRVoK6RoEGdfPPfq6yFvuLbl+jwpm8GgDA6t/+e7Hm91dLb9Yj71J9i+fxzisH+kIOAJ7dUg6gvA1btm56H2BUv+B2vVfXz54QAmq1GkIIzVdbSSQS2NnZwdbWFnZ2drC3t4ejoyMcHR1hb2/PX1hEv0lKSsL8+fMxcOBAndu8/vrrsLGxwauvvnrPxzNp0AUGBmq14IqKilqc8SQjIwMZGRma5dZuDGyrwbGhWOrpCUWAO6RS/b+AEqLV6BlYjDBvZ9hIpYgL8tC8b4BaIDbiGmK6d8HIC5WI6d4FD//235El1yARQGyAO0ZdqIQiwB0A0E9+DYoAd6jVAr16/L7f6O5d8NnhIijLbsDXzQFHSyoxIT4YCaGeaGxU472dp+DfxRE9fZzwxY9KxHR3w88llYjydcH6H87DzckRLw7vDQd7G9SrVNhwqAjeznaQSqTwcbPDlrwSBHu6QBHghi+PKKEWgI2NDUZG+uDzIxfwqKI7/u9wMbyd7XCxuhZSiRSBXV0w+Q9BePur41ALwF4K/HqtESoALhLg+m1Z4Aag5bkJOt7HJ4CPT5zX+botgO9f/iO8uji1+xi3B96tEFSpVFpfjY2NUKlUqK+vR319PRoaGlBZWQmVSqXZj1QqhbOzM1xcXODq6goXFxcGH5GRGHQwyi02NjYtDkZRqVTo1asXvvrqK81glBEjRmDq1Kmt7o+DUSzTtepaDHhrF26YupDfHJqTBB93Z4Ptv6GhATdv3kRtbS1qampQXV2N+vqmlrFMJoO7uzu6du0KV1dXhh41I4TAyZMnERERYfKfDxsbG7zyyivYsmULZDIZVq9ejddeew0nTpzAqFGj8N577wFoGoA4e/Zs1NfXIzAwECtXroSvry/Ky8sxadIk/Prrr5DL5VAqlVi8eDEGDhyIo0ePYvbs2aisrISjoyOWLVuG6OhonS06XeeltVwwaItuxowZ2Lp1K1QqFQICAjB69GhMnToV8+bNw7Zt2yCTybBixQqkpqairq4OiYmJmDRpkiFLIhNyd3XA8YXJ97yfg+eKMfHjo/e8n4SFOc3WnXx9OBwcOuafha2tLWxtbeHm5qZZV19fj6qqKly7dg3l5eUoKyuDvb09vL294eXlxVtrSKfCwkLU1rbl4sDdcXBwQI8ePVrdRqVSITo6Gm+88Qaef/55PPLIIzh06BCcnZ3Rq1cvPPvss/Dz88OECROwZcsW9O3bF++99x5mzZqFDRs24O9//ztiYmKwdetWHDlyBPHx8QCa/hicPn06srKy4O/vj9zcXKSnp+OHH37o0M9o0KBbvnx5i+u3bdum+X7w4MFsndFd+UNIIAoXBra6zV/XbMUXJ+9+3xGvf6O1PCkOePPP9x7Ot9jZ2cHLywteXl5QqVQoLy/HlStXoFQqUVpaCl9fX3h7e0Mq5aRFZD4kEgnGjh0LAFAoFLh48SK6du0KAIiIiEBhYSEqKyvh6+uLvn37AmgaRf/OO+8AAPbu3Yv169cDAOLi4hATEwMAOHXqFI4fP47k5N//jZWX3/v1+ztxmBhZpcVTkrFYx2sD5mxFSRv3s/YIsPbIVs1yYQe0SG+RyWTw9vaGt7c3qqqqUFJSAqVSiStXriAoKEirJUikr9VlSLdGGt/63t7eXuu1xsbGZt2rbeluFUIgNDQUeXl5HVrvnRh01OkcaCGs4uZsRVvG8vaY83vozR7qgtlDEzukJjc3N7i5uaGiogLFxcUoKCiAl5cXAgMD2boji9C7d2+UlpYiLy8PCoUCq1atwuDBgwEAiYmJWL9+Pd566y3k5eXh2LFjAJpag9XV1di1axeGDBkCIQTy8vIQFxfXobUx6IgAHLkj/B6asxX6ej7f33kd7+9sCr78eQ/CyantUxLp4uHhATc3NyiVSly9ehU3btxASEgIHBwc7nnfRIZkb2+PTz/9FOnp6aivr0dAQABWrVoFAJg3bx4mTZoEuVyOyMhIzTU6W1tbbN68GTNnzsTzzz+PhoYGpKSkdHjQGWXUZUfjqEsypsZGNcJe/brN23dU92ZZWRmKioogkUgQFhYGFxeXDtkvWQZzGnVpTsxu1CWRNbCxkWqF1+3dly259fq6J6MxMDSo3cf19PSEk5MTCgoKcPr0aYSEhMDd3b3d+yPqrBh0RHepraH3+CfHABxDHwBft7OV5+joiIiICBQUFODcuXMIDQ1Fly5d2rUvos6KV7mJ7kHhwmQULkxGbCu3v51AUyD21dMS1MXOzg7h4eGws7PD2bNnUV1d3b5iiTopBh1RB9jyVrIm9HQpR1PgPdiOwLO1tUV4eDhsbW1x9uxZPp+M6C4w6Ig6mL7AO42mwJv72d0Fnp2dHcLCwiCEwJkzZ7Tm0iQi3Rh0RAaiL/DW/6x/YMudHB0d0aNHD9TW1qKwsPAeKyTqHBh0RAamL/B6zNl6V4Hn4eEBX19fXLt2DVevXu2IEonuSk5ODoYOHdrqNm+88YaRqtGPQUdkJG0JvNc2tC3wunfvDicnJxQXFxtkol+ie8WgI+rEChcm4+CLg1p8be2RtnVnSiQS9OzZs2l/hYV39YBYovZYv349wsPD0a9fP2zZsgUAcPjwYQwYMABxcXFQKBT49ttvAQDPPfccVCoVFAqFpuX37LPP4r777kN0dDQef/xxow6o4swoRCYUOmcrdA0pacsMK5cuXYJSqURQUBC8vb07tjgyqTtnAFGpBa5U6w8HtRDIv1AFeXc3SNswo4q3qz1keh4+XVpair59++Lw4cPw9fVFamoqKisrsWnTJjg6OsLW1hYlJSUYNGgQzp49C6D5c0jLysrg6ekJAJg5cyYiIyMxY8YMvfXdqT0zo7BFR2RCZ1vpzuwxZyv66Wnd+fj4wMnJCSUlJWhoaDBEiWRh8i9U4fX/Hkf+haoO2+cPP/yAgQMHws/PDxKJBJMnTwYAXL9+HY899hiioqKQnJyM4uJindeNt2zZomnRbd68WTOxszFwZhQiM1C4MLnFLssyNAWerjCUSCQIDg7GiRMnoFQqNd2ZZH1kUgl8u+if3NvH1R5dXeygCHCHVE9L7V7NnTsXffv2xYYNGyCRSODp6dniNePCwkK8/vrrOHz4MLy9vbFkyRL8/PPPBq3tdmzREZmJwoXJeCKh5ddau27n5OQELy8vlJeXo6amxkDVkaWQSiXoG+TRoSF3//3348CBAygtLYUQAuvWrQMAVFZWwt/fHxKJBF988YXWQ1OdnJxw48YNAEBVVRUcHR3h4eGBmpoarF27tsNqawsGHZEZmZfSelemLt27d4dUKoVSqTRUadSJ+fr64t1338WgQYMQHx+PwMBAAMDLL7+Mt99+GwqFAnv27EFQ0O+TmD/zzDPo168fhg4dipiYGCQlJSEiIgJDhgxBQoKOv+gMhINRiMyUrmDTFYQXLlzAxYsXERYWxomfrQAf09MyDkYhsiJ327Lr1q0bbGxscOHCBUOWRWRxGHREZuxuwk4mk6Fbt26oqalBZWWloUsjshgMOiIzV7gwGX1aWN9S2Hl7e8PGxgYXL140fGFEFoJBR2QBvl6YjFlDnJutvzPsZDIZfHx8cOPGDVRVddx9VGQ6FjiMwqDacz4YdEQW4rlhSfi/GbHN1t8Zdj4+PpDJZCgtLTVWaWQAEokE9vb2KCsrg1qthhCi03+p1WqUlZXB3t7+rgbo8IZxIgvyPz0DsGJqPdJXn9Baf/tN5TKZDF5eXrh06RJu3rwJR0dHU5RKHSAwMLDV2UY6I3t7e83tDW3FoCOyMEN7h2DqfSewOld7/e1h5+Pjg0uXLuHy5csIDg42QZXUEWxtbRESEsLuy9u051YLdl0SWaC/PZKMXi2sv9WNaWdnBw8PD5SVlWlNrEuWSSKR8Ou3r/Zg0BFZqB16bj3w8fGBEAJXrlwxZllEZodBR2TBdN1n91HOAbi4uMDJyQlXrlxh1xd1agYNupycHERGRiIsLAzp6elQqZo/eWvx4sWIjIxETEwMHnroIVy6dMmQJRFZnZbCbvH2awCa7qtraGjgrQbUqRks6NRqNdLT0/H555/jzJkzqKqq0sx4fUtBQQGWLVuGH3/8EUePHoVCocDixYsNVRKR1Wop7HrM2QoPDw9IpVKO2qNOzWBBl5ubi+7du0MulwMApk2bhqysLK1thBBoaGjAzZs3IYRAVVUV/Pz8DFUSkVVrKexC525H165dUVlZyQezUqdlsKBTKpVa9zoEBQWhuLhYa5vw8HA8++yzCAwMhJ+fH44fP45Zs2YZqiQiq+fawrqcy5chhEBZWZnR6yEyBwYLurZc/C4rK8OmTZtw5swZlJSUICQkBIsWLWq2XWZmJuRyuearoqLCECUTWbxjLbTq/pZ1GY6Ojuy+pE7LYEF3647+W4qKihAQEKC1zXfffYeQkBD4+flBJpPh0UcfxcGDB5vtKyMjA/n5+ZovDw8PQ5VNZPFa6sL8039+RV1dHa5fv26CiohMy2BBFx8fD6VSqXkQ3sqVK5GSkqK1TXBwMH744QfNiLAdO3ZorukRUfvpuu2gvLzcyJUQmZ7Bgk4mk2HFihVITU1FaGgoXFxcMGnSJGRnZyM9PR0AkJCQgLS0NCQkJCA6Ohrnz5/HnDlzDFUSUaf2yOelqKio4D111OlIhAX+1Lf2yHQi+l1Lz6z7+eWB6NKliwmqITKc1nKBM6MQWbGdz/Vvto7dl9TZMOiIrFhYt67N1v1xeX6LsxQRWSsGHZGVa2lgSmVlpQkqITINBh1RJ9T33e9NXQKR0TDoiDqBllp17L6kzoJBR9RJ9LhjOXTudlOUQWR0DDqiTiJHx03kRNaOQUfUidzZhdnSfXZE1oZBR0REVo1BR9TJfPXsfVrLbNWRtWPQEXUyUf4+pi6ByKgYdESdkNMdy2zVkTVj0BF1QvkcgUmdCIOOiAAAVdfrTF0CkUEw6Ig6qTtvNYiZv9NElRAZlt6gmzp1Kr7/nvPiERGRZdIbdA8++CDmzp2LqKgo/POf/0RZWZkx6iIiI+AN5NQZ6A26CRMm4LvvvsOWLVtw5coVxMXFYfz48cjJyTFCeURERPemzdfozp07h4KCAjg7OyMiIgJvvvkm0tLSDFkbERHRPbPRt8Gbb76JtWvXIiYmBjNmzMCwYcM0r/Xq1cugxRGR4Z17ewRCXvlas9xjztYWH+tDZKn0Bp1UKsXevXvh6+vb7LVvvvnGIEURkfFIpRx8TdZN7094QUFBs5CbMmUKACAkJMQgRREREXUUvUF39OjRZusOHz5skGKIyDROvj5Ea5mjL8ma6Oy6XLJkCT788EMolUqEh4dr1ldXV2P48OFGKY6IjMPBwcHUJRAZjM6gmzx5Mh5++GE8//zz+Oc//6lZ7+rqiq5duxqlOCIionslEUIIUxdxt+RyOfLz801dBpFVuXbtGhQLD2it4+hLshSt5YLOFt2YMWOwefNm9OrVCxKJRLNeCAGJRILTp093fKVEZDKurq6mLoHIIHQG3bJlywAAO3dyoleizkAmk5m6BCKDYNclEWmUlpbif97XHlXN7kuyBO3quryzy/KWu+m6zMnJQUZGBurq6pCUlITly5c3+6vx0qVLmD59Ok6dOgUhBN577z2MHDlS776JqOO5ubmZugSiDqcz6O61y1KtViM9PR3Z2dmQy+UYN24c1q1b12x+zLS0NDzxxBMYN24cGhsbUVlZeU/HJaL2c3R0NHUJRB1OZ9AFBwff045zc3PRvXt3yOVyAMC0adOQmZmpFXSnTp3CpUuXMG7cuKZibGzg6el5T8clovaTSCTYPUOOPy7npQGyHjpnRhkzZgyApi7M8PBwzdetZX2USiUCAwM1y0FBQSguLtba5uTJk/Dx8cGECRMQFxeHyZMno7y8vJ0fhYg6wp3dl5wlhSydwUZdtmWMS2NjI/bu3Yvc3FxERUVh3rx5+N///V+sXLlSa7vMzExkZmZqlisqKtpVExHpx+t0ZG10tuj8/PwANHVhOjs7Iz8/H/n5+XBxcWlTt2ZgYKBWC66oqAgBAQHNtpHL5YiKigIAjB8/vsV5NDMyMjTHz8/Ph4eHR9s+HRHdNVtbWzR/VgmR5dI7qfOGDRsQHR2NFStW4JNPPkFMTAw2btyod8fx8fFQKpWa4Z4rV65ESkpKs23q6+s1gbhjxw5ERka253MQUQf6PCNGa5ndl2TJ9D6Pbt68ecjNzdW0xkpKSjBkyBDNABJdZDIZVqxYgdTUVNTV1SExMRGTJk1CdnY2srOzsWLFCkilUixduhSjR49GY2Mj/P39sWrVqo75ZETUbpwlhayJ3qBzcXHR6nL09/eHs7Nzm3Y+ePDgZjfwjRo1CqNGjdIsP/DAA/jpp5/aWi8RGQGDjqyJzqA7ePAgACAxMRETJkzA5MmTIZFIsHbtWiQlJRmrPiIyAZlMhu1TQvHQmrOmLoXonukMurlz52otv/vuu5rvL168aLiKiMgstHSbAacDI0ukM+h2795tzDqIyMyw+5Kshd5rdABw+vRp/PLLL6itrdWsmzhxosGKIiLTa+u1eCJzp/f2gvfeew+PP/44pk+fjk2bNuHpp5/Ghg0bjFEbEZmQVCrFjmn6Z0EiMnd6g27NmjU4cOAA/P398cUXXyAvLw9qtdoYtRGRiXE6MLIGeoPOwcEBtra2kEgkaGxsRM+ePVFYWGiE0ojI1HidjqyB3mt0Hh4eqKqqwuDBg/Hoo4/Cy8sLXbt2NUZtRGRifGwPWQO9Lbrs7Gy4uLhg0aJFGDt2LGJiYrBlyxZj1EZEJiaRSPDd9D6mLoPonrSp67K8vBzffvstvL29MXHiRLi7uxuhNCIyB7xOR5bOYJM6E5F14HU6snQGm9SZiKyDvb29qUsguid6W3T3MqkzEVmHPU9HmboEonbjpM5EpBfnvSRLxkmdiUgvXqcjS8ZJnYlILxubNk2LS2SW9F6jU6vVWL58OcaPH4/x48fjk08+4RRgRJ3Q/mdiTV0CUbvoDbrZs2dj+/btmDBhAiZOnIjt27dj9uzZRiiNiMwJ76cjS6W3P2LPnj34+eefNcsjR46EQqEwZE1EZIZcXFxMXQJRu7Sp67KqqkqzfP36dQghDFoUEZkfqVTvrwsis6S3RfeXv/wF8fHxGDt2LABg8+bNeP755w1eGBGZn+9n9UX/D34ydRlEd6XVP9GEEBg1ahQ+//xz+Pv7IyAgABs3bsSMGTOMVR8RmRFepyNL1GqLTiKR4KGHHsKxY8cQG8sRV0SdnZOTk6lLILprejvdIyIicPr0aWPUQkRmTiKRmLoEorum9xpdSUkJYmNj0bdvX605Lr/99luDFkZE5unQ8/ch4R+5pi6DqM30Bt2CBQuMUQcRWQjOe0mWRm/QJSYmorGxEQUFBQCAXr16cTogok7MwcHB1CUQ3RW9iZWTk4PJkyfD29sbQgiUl5fj3//+NxITE41RHxER0T3ROxjlmWeeQXZ2Ng4fPoyffvoJW7ZsQUZGhjFqIyIz9dOL/U1dAlGb6Q06W1tbrSm/YmNjYWtr26ad5+TkIDIyEmFhYUhPT4dKpdK5bXJyMsLCwtq0XyIyrTsf23P52g0TVUKkn96gGz58OObMmYMTJ07g5MmTeOWVVzBixAhcuHABFy5c0Pk+tVqN9PR0fP755zhz5gyqqqqwbt26Frddv349unbt2v5PQURGdecfuwkLc0xTCFEb6L1Gt2HDBq3/3vLZZ59BIpHg3LlzLb4vNzcX3bt3h1wuBwBMmzYNmZmZSEtL09ru6tWryMzMxKpVqzBy5Mh2fQgiIiJd9Abdr7/+2q4dK5VKBAYGapaDgoJQXFzcbLvZs2dj/vz5HMlFZGF+fnkgYhfsN3UZRHoZbDrytjzh4Ouvv4ZMJsPgwYNb3S4zMxNyuVzzVVFR0VFlElE78bE9ZCkMFnSBgYFaLbiioiIEBARobbN3717s2rULPXr0wMCBA3H+/HnExMQ021dGRgby8/M1Xx4eHoYqm4jaSCaTaS1zgmcyVwYLuvj4eCiVSuTn5wMAVq5ciZSUFK1tFixYAKVSicLCQuzfvx/BwcE4evSooUoiIqJOyGBBJ5PJsGLFCqSmpiI0NBQuLi6YNGkSsrOzkZ6ebqjDEpERDfY2dQVE+kmEjotpvXr1anGmciEEJBKJSZ9oIJfLNS1FIjIdIQR6vrxNs8w5L8lUWssFnaMud+7cabCCiMg63PnHMCd4JnOkM+iCg4ONWQcREZFB6Ay6MWPGYPPmzc26MM2h65KIzIc7gGsmroGoNTqDbtmyZQDYhUlErctbmMxbC8is6Qw6Pz8/AOzCJKK7w+t0ZG703l6Ql5eHP/zhD3Bzc4OdnZ3mi4iIyBLonevy6aefxtKlS/HEE09g3759+Ne//oWGhgZj1EZEFsIXQKmpiyDSQWeL7sCBAwCA+vp6xMXFobGxES4uLvjrX/+KrKwsoxVIRObv/7GrksyYzqB79tlnAUDTTRkcHIwNGzbgwIEDqKysNE51RGSRODiFzInersu//e1vqKysxOLFi/GXv/wFVVVVWLJkiTFqIyIiumc6g66oqAjTp08HAGzatAkAEBoaqll+6KGHjFAeEVmKlHBgE2+vJTOkM+icnZ0xYMAAY9ZCRBbsH08kY9NtXZbvbP0WLyU/aMKKiJroDDpPT0+kpaUZsxYisiLL9jXgJY5RITOgczBKW54QTkREZO50Bt2+ffuMWQcRWQHOiELmSGfQubi4GLMOIrJCvM2AzIHBnjBORERkDhh0RERk1Rh0RNSheJ2OzA2DjogMitfpyNQYdEREZNUYdEREZNUYdETU4e68TsfuSzIlBh0REVk1Bh0REVk1Bh0RGcSd3ZdL935vokqos2PQEZFRvLut3NQlUCfFoCMiIqvGoCMig+EsKWQODBp0OTk5iIyMRFhYGNLT06FSqbRez8vLw4ABAxAZGYmoqCh8+OGHhiyHiEyMtxmQKRgs6NRqNdLT0/H555/jzJkzqKqqwrp167S2cXJywqpVq3D8+HEcPHgQS5YsQV5enqFKIiKiTshgQZebm4vu3btDLpcDAKZNm4asrCytbcLDw9G7d28AgJubG/r06YPi4mJDlUREJjD/0UBTl0CdnMGCTqlUIjDw9x/woKCgVkPs7Nmz+PHHHzFgwABDlUREJvB4vxitZXZfkrHZGGrHQog2b3vt2jWMGTMGH3zwAbp27drs9czMTGRmZmqWKyoqOqRGIiKyfgZr0QUGBmq14IqKihAQENBsu5qaGiQnJ+PJJ5/Eo48+2uK+MjIykJ+fr/ny8PAwVNlEZACJnqaugDozgwVdfHw8lEol8vPzAQArV65ESkqK1jYNDQ1ISUnBsGHDMHPmTEOVQkQm9u//5STPZDoGCzqZTIYVK1YgNTUVoaGhcHFxwaRJk5CdnY309HQAwMaNG7Fjxw5s3rwZCoUCCoUCX3zxhaFKIiKiTkgi7uZimpmQy+WaliIRWYaPcg5g8fZrmmVHACd4Qzl1kNZygTOjEJFRPJOkPaL6ponqoM6HQUdEJmOBHUpkgRh0RGQ0d8592fPlbSaqhDoTBh0RmRRbdWRoDDoiMim26sjQGHREZFR8dA8ZG4OOiEzuo+++M3UJZMUYdERkdHe26hZ/y5sNyHAYdERkFtbn5pq6BLJSDDoiMok7W3Vzsy5zBCYZBIOOiMzGzEyOwKSOx6AjIpO5s1X3XyWgUqlMVA1ZKwYdEZmV0LnbTV0CWRkGHRGZVEv31dXU1JigErJWDDoiMjvyN3ZzYAp1GAYdEZlcS626FTk5xi+ErBKDjojM0lvf1ODGjRumLoOsAIOOiMxCS626yDdz0NDQYPxiyKow6IjIbLQUdr1e+5bX6+ieMOiIyOwVFRWZugSyYAw6IjIrLbXqEpf9ggsXLpigGrIGDDoiMjsthd0fPjyC0tJSE1RDlo5BR0Rm6dCcpGbr/uf9w1AqlcYvhiwag46IzJKPuzOiJc3XD/zoZyz7+muo1WrjF0UWiUFHRGbrvwuSEdDC+nf2qBHyyteora01ek1keRh0RGTW9i9MxuuP+Lf4WsTru1BaWsrbD6hVDDoiMntT7lO0OEAFaLpu1/PlbaisrDRyVWQpGHREZDF0hR0AxC7Yjx5ztqKiooItPNLCoCMii1K4MBnJwbpfj3vnIHq+vA2PzNnKx/0QAAMHXU5ODiIjIxEWFob09PQWnxy8ceNGhIeHIzQ0FHPnzjVkOURkJTKfTkbhwmR4tLLNYTQ97qfHnK3oMWcrzpSUcN7MTkoiDNTGV6vVCA8PR3Z2NuRyOcaNG4fk5GSkpaVptqmsrER0dDR++OEHeHt7Y9CgQViwYAESExNb3bdcLkd+fr4hyiYiC/Tuth1Yurf+rt/nJgO+np2Ari4usLOzg0wmM0B1ZAyt5YKNoQ6am5uL7t27Qy6XAwCmTZuGzMxMraDbvn07kpKS4OfnBwBIS0tDVlaW3qAjIrrdi38ahhf/1PR9xJytaOtNB1UqYMB7hzq0FmcZ4O0iQVWDFFnTFfB0ctK5rUTSwo2CHbCtpXF2djbo5zNY0CmVSgQGBmqWg4KCUFxcrHeb7du3G6okIuoETt4xYKXHnK1GPf4NFXCjUgBQYcj7hxHoDDQAGBZij/IagXMV9ahRAX28bGErk8HHSYpL11WoqBPo7WkDqUSCi1UNKL4uMCbCEXYyGZRVKgS62UAl1Dh8oQ4+zjYIdm/aVgIJVEKt2UaqIzDUQqD8phoJ/vY6tzGV2NhY2NgYLI4MF3Rt6RFta69pZmYmMjMzNcsVFRXtrouIOhddIzWfXb4V//2144/nbAP4OEtwqQaoaRA4/9uzY9f9UgfVbb/yfq1sgAQNcLSVoqahaZaXw5caIZNIUF3XNJ7B39Md4/r6YtmRAsxPDsaZKzXIPn0Wbo42eD+lD2L8XQEAP5dUYdmRM3gzOQgx3V1brOvohWos3XoGkaEBOrcxFanUsOMiDRZ0gYGBWi24oqIiBAQENNvm6NGjrW4DABkZGcjIyNAs3+oOJSJqryUzkrHEgPtvbFTjPz/8im+PluBGA/DXYeE4V34DP567ims1jRjSxxe2tlKE+bjg9KVqlFTcxODe3SCRAvkXK/FzcSUWjomBnZ0MH7i5QhHgjgFqAQcnR4T7uKBfcFdIpU0ts0Fd3PGBmxsUAe6adXca5O6hdxtrZbDBKCqVCr169cJXX32lGYwyYsQITJ06VbNNZWUloqKicOjQIc1glLfeegt//OMfW903B6MQEdHtWssFg7UXZTIZVqxYgdTUVISGhsLFxQWTJk1CdnY20tPTAQBdunTBokWLMGjQIPTu3RuJiYl6Q46IiOhuGKxFZ0hs0RER0e1M0qIjIiIyBww6IiKyagw6IiKyagw6IiKyahY5GMXNza3F++0sQUVFBTw8WpuKlnThuWs/nrt7w/PXfsY6d0qlElVVVS2+ZpFBZ8k4YrT9eO7aj+fu3vD8tZ85nDt2XRIRkVVj0BERkVVj0BnZ7XN20t3huWs/nrt7w/PXfuZw7niNjoiIrBpbdEREZNUYdEZQXFyMIUOGoE+fPoiMjMTLL79s6pIsUkZGhkEfzmitbty4gbS0NPTu3RsRERFYvny5qUuyGOvWrUNMTAwUCgUeeOABnDp1ytQlmbVZs2YhICCg2b/TOXPmICwsDOHh4cjKyjJ6XQw6I7CxscE777yDEydO4MiRI9i/fz+2bNli6rIsyr59+3D9+nVTl2GRXnjhBURGRuLUqVM4ceIExo4da+qSLEJNTQ1mzZqF7777Dnl5eXjsscfw6quvmross/boo4/ixx9/1Fq3c+dOHDx4EKdOncLu3bvx3HPPGf3fMoPOCPz8/BAfHw8AsLOzQ1xcHIqKikxcleWoq6vDnDlzsHjxYlOXYnGqq6uRnZ2N559/HgAgkUjg4+Nj4qosg1qthhBC80u5srISfn5+Jq7KvA0cOBC+vr5a67KysjBlyhTIZDL4+/tjwIAB+Pbbb41aF/uBjKy8vBybN282+v9oS/bGG29g2rRp8Pb2NnUpFufcuXPo1q0bnnnmGRw6dAjBwcF4//33ERwcbOrSzJ6Liws++ugjREVFoUuXLujSpQu+//57U5dlcZRKJVJSUjTLQUFBKC4uNmoNbNEZUX19PVJTUzFr1ixERESYuhyLcPToUfzwww9aT6antmtsbEReXh5SU1Px008/4eGHH8YTTzxh6rIsQkNDA5YuXYrc3FyUlJQgNTUVL730kqnLsjjmMLCfQWckKpUKEydOhEKhwAsvvGDqcizGgQMHkJ+fj549e6JHjx5QqVTo0aOHzjntSFtAQAA8PT0xdOhQAMD48eNx+PBhE1dlGfLy8iCEQJ8+fQA0nbuDBw+auCrLExgYqNWCKyoqMvpcxQw6I5k+fTpcXV3x3nvvmboUi/L000/jwoULKCwsRGFhIWQyGQoLC+Hm5mbq0ixCt27dEBkZiZ9++gkAsGPHDkRGRpq4KssQEBCAU6dOoaSkBEDTuZPL5SauyvKkpKRgzZo1UKlUKCkpwf79+/Hggw8atQZeozOCAwcOYNWqVYiKikJcXBwA4IknnsDMmTNNXBl1BsuWLcO0adNw48YNuLu7Y8WKFaYuySL4+flh4cKFGDZsGGxtbeHt7Y1Vq1aZuiyzNmPGDGzduhUqlQoBAQEYPXo0MjMzsWPHDoSHh0MqleIf//gHXF1djVoXZ0YhIiKrxq5LIiKyagw6IiKyagw6IiKyagw6IiKyagw6IiKyagw6IiKyagw6srpH32zevBlHjx5t13sffPBBzQ3CPXr0gFKpbHcdCxcuRGhoKCQSSbP9fPDBB+jVqxfCwsKQmZnZ7mO015QpU7Bu3TqjH7ezW7hwIf7zn/+YuoxOh0FHRtHY2Gi0fbU36Hbt2gV/f3/4+/u3tzQtQ4YMwXfffddsAuWCggIsXboUR44cwZEjR/D+++/j119/7ZBjtqQjz70l19AaY9WXkZGBRYsWmcX8j50Jg4602NjY4I033oBCoUBMTAxOnz4NIQR69uypaekAQHJysuYJDEuWLEFCQgJiY2ORnp6OhoYGAE0tojlz5iA+Ph5LlizBl19+qXmIZUxMDM6fPw8AyMnJwcCBA9GvXz88+OCDLc5svmbNGowcORLDhw9HVFQUACA1NRXx8fGIiorCc889BwDYvXs3srOzMXfuXCgUChw8eBA3b97EU089hYSEBERHR+Ojjz5q8bP/+9//Rmpqaouv7d+/H/Hx8YiJiUFycjJKS0sBND2N4k9/+hMiIyPxyCOP4P7778f+/fsBAPfdd1+LTwnYtGkT/vznP8PFxQWurq5ITU3Fl19+qbWNWq1G9+7dUVdXB7Vaja5du2pafq+//rrmM3z55ZeIjY1FdHQ0Jk6ciOrqagBNLbYZM2agf//+SEtLQ21tLSZNmoSIiAgMHz4cV65c0Rxr3rx5iIyMRExMDIYNG9bi57exscErr7yC6Oho9O3bF8eOHdPUOXfuXCQkJCAmJgavvPKK1nteffVVxMXFYdOmTVr7O3z4MAYMGIC4uDgoFAqtp3noOtaaNWvw8MMPY+jQoejduzfS0tJQX18PADh//jxGjhyJ+Ph4xMfHY8+ePW06zu31rV69GgkJCYiLi0NSUpLmj4+cnBwMGDAAEydOhFwux4gRI1BbWwsAuHr1Kv785z8jJiYGMTEx+OyzzwA0TUY+ePBg9OvXDwMHDtR8BldXV4SGhmLfvn0tnmcyEEGdnkwm03wPQHz22WdCCCEWLVok0tPThRBCvPTSS2Lx4sVCCCGuXr0qAgICRGNjo9i1a5d4/PHHhUqlEkIIkZGRIZYuXSqEECI4OFi89tprmn1HR0eLCxcuCCGEqKmpETdv3hRlZWWif//+4tq1a0IIITZu3CjGjRvXrMbVq1cLLy8vUVpaqll39epVIYQQKpVKjB49Wmzfvl0IIURaWppYu3atZrvXXntNLF++XAghRG1trejXr584fvx4s2P06NFDa//BwcGiuLhY1NbWioCAAHH48GEhhBCLFy/W1Dhz5kzxyiuvCCGEyMvLEzKZTOzbt09rv7f2c8szzzwjPv74Y83y0qVLxezZs5vVM2rUKLFnzx6Rl5cnEhISxKOPPiqEECIxMVEcO3ZMXLx4Ufj6+orz589r9vviiy9qzsHgwYNFfX29EEKIf/zjH2LChAlCrVaLoqIi4ebmJtauXSvKyspEnz59NP//ysvLm9UhRNPPRWZmphBCiM2bN4t+/foJIYRYuXKl5vOrVCrx8MMPi23btmnes3Llyhb3V1lZqalNqVSKkJAQvcdavXq1cHd3F0qlUqjVapGSkiKWLFkihBBi8ODB4pdffhFCCHH+/HnRs2dPoVar9R7n9vpu/TwJIURWVpYYP368EEKI3bt3CycnJ3H27FkhhBDJycli3bp1QgghJk6cKN544w3N+8rKykR9fb24//77hVKpFEIIcejQIZGQkKDZZv78+WL+/PktnhcyDOu6OEP3TCKR4JFHHgHQ1CLZvn07AOCxxx5DWloaXnjhBWzcuBEpKSmQyWTYtm0b9u7di759+wIAamtr4ejoqNnfY489pvk+KSkJjz/+OMaMGYPRo0cjKCgIO3fuxKlTp5CYmAigqYWgax68oUOHolu3bprljz/+GBs3boRKpcLly5cxcOBADB8+vNn7tm3bhps3b2Lp0qUAgKqqKpw6darZBL0XL15s8Zl3J0+ehK+vr+YzTps2De+88w4AYO/evVi/fj0AaFpW+og2dlslJiZiz5496NKlC5588klkZmaitrYWZ86cQWRkJLKzszFw4EAEBQVp6po+fbrm/ePGjYOtra2mzieffBISiQSBgYEYPHgwAKBLly5wdnbGlClTMHz4cDz88MM660lLSwMAjB49GlOnTsWNGzewbds2/Pzzz9i6dSsA4MaNGygoKMCIESMAABMnTmxxX9evX0d6ejry8/NhY2OD4uJiXL16FV5eXjqPBTT9DNzqWp48eTLWrVuHKVOmYP/+/Vo/a/X19bh8+TJUKlWrx7m9vlOnTmHu3Lm4evUqVCoVpNLfO7z69u2LkJAQAE3/Lm619r755hssW7ZMs13Xrl3xyy+/4Pjx40hOTtasLy8v13zv4+ODvLw8neeZOh6DjrRIpVLNL0eZTKa5dhEdHY3GxkacPHkSn376qeYpDEIIPPfcc5g9e3aL+3N2dtZ8/+GHH+LIkSPYsWMHEhMTsW7dOggh8MADD2Dz5s16a7t9X3v27EFWVhb27t0LV1dXvPDCC5rupDsJIbB+/XooFIpW9+/g4IC6ujqtoAaawr+15ba+dktbH1uSlJSEF198EV26dME777yDXbt2Yc2aNUhISIBEItFb1+3nSxeZTIaDBw9i7969+Oabb/Dqq68iLy8PXbp00fteoOncLlq0CGPGjGlx3w4ODi2+b+7cuejbty82bNgAiUQCT09Pnf//btfS+VWr1XBycmoxPKZOnarzOHfW99hjj+HTTz9F//79cezYMYwdO1bzmr29vdbnuv2a3p01CSEQGhqqM8zu/GOQDI/X6KjNJk6ciAULFuDy5ctISEgAAIwYMQKrV6/GtWvXAAAVFRU6B1acPn0acXFxePHFFzFs2DDk5eWhf//+OHToEH755RcATQ+7vHU9ozWVlZVwd3eHq6srysrKkJWVpXnN1dVV63l1I0aMwAcffACVSgWgaTBIS8+zi4yMREFBQbP1vXv3RmlpqeYX16pVqzQtokGDBmmuyxw7dqxNg2DGjh2LDRs24Pr166iursYXX3yh9Uv1FoVCgfz8fJw9exZhYWFISkrCggULkJSUBABISEjAgQMHNCM6V69eranrTomJifj0008BACUlJdi9ezcAoLq6GmVlZRgyZAgWLlwIBwcHnSNN165dCwD46quvEBISAmdnZ4wYMQLLli3ThMeFCxc01y9bU1lZCX9/f0gkEnzxxRdaLR5dxwKAnTt34uLFixBCYN26dUhMTISbmxsiIyO1nixw67FE+o5zu6qqKk1r8eOPP9b7GQBg+PDh+OCDDzTL5eXliIiIQHV1NXbt2gWgKfiOHDmi2eb06dOa68xkHAw6arOJEydi7dq1GD9+vGbd0KFD8dRTT2HQoEGIiYnBkCFDdP6ifPHFFxEVFQWFQoFLly7h8ccfh5eXFz777DOkp6cjNjYWCoUCe/fu1VvLQw89BBcXF/Tu3RupqakYNGiQVp0fffSRZjDKq6++ChcXF8TGxiIqKgpPPvmkZhDD7UaNGqX55XQ7e3t7fPrpp0hPT0dMTAx27NiB999/H0DTQI7c3FxERkbi73//O6KiojStofnz5yMgIABKpRL33XcfRo0aBQAIDw/HU089BYVCAYVCgZkzZ2q6xW4nlUrRt29fzS/FxMREFBUVabp5fX198dFHHyE5ORnR0dG4fPky5s6d2+L5euqppyCRSBAREYEnnngCAwYMANAUBKNHj9YMphg9enSLz6uTyWQoLi5GTEwM5s2bpwmVadOmoX///oiPj0d0dDRSUlI0f/S05uWXX8bbb78NhUKBPXv2aLpfWzsWAAwYMEAzqMbR0RFPPvkkAGD9+vWagTlyuVwzWKe149zp3XffRWJiIvr16wcPDw+9nwFouk3k6NGjiIqKQmxsLHbs2AFbW1ts3rwZ8+fPR2xsLCIjI7X+ENu7dy8eeuihNu2fOgYf00P0mytXrmD06NE4cOBAm7oggaZrQRKJBLa2tigoKMDgwYNx+vRpq+uasrGxMdoQfF3HWrNmDfbv32/Rz9P7/vvvsXTpUk2LlYyD1+iIfuPt7Y2XXnoJpaWl8PPza9N7Ll68iLFjx0KlUkEIgX/9619WF3LUccrKyvD222+buoxOhy06IiKyarxGR0REVo1BR0REVo1BR0REVo1BR0REVo1BR0REVo1BR0REVu3/AynPb0Q3AVUZAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 390;\n", + " var nbb_unformatted_code = \"surv_model.plot(color=\\\"gray\\\", alpha=0.4, label=\\\"model\\\")\\nsurv.plot(marker=\\\".\\\", ms=1, lw=0.2, label=\\\"data\\\")\\ndecorate(xlabel=\\\"Inverse rate (log10 words per appearance)\\\", ylabel=\\\"Tail probability\\\")\";\n", + " var nbb_formatted_code = \"surv_model.plot(color=\\\"gray\\\", alpha=0.4, label=\\\"model\\\")\\nsurv.plot(marker=\\\".\\\", ms=1, lw=0.2, label=\\\"data\\\")\\ndecorate(xlabel=\\\"Inverse rate (log10 words per appearance)\\\", ylabel=\\\"Tail probability\\\")\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "surv_model.plot(color=\"gray\", alpha=0.4, label=\"model\")\n", + "surv.plot(marker=\".\", ms=1, lw=0.2, label=\"data\")\n", + "decorate(xlabel=\"Inverse rate (log10 words per appearance)\", ylabel=\"Tail probability\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "With the y-axis on a linear scale, we can see that the model fits the data reasonably well, except for a range in the middle of the distribution -- the words that are not common or rare.\n", + "\n", + "And here's what the model looks like on a log-y scale." + ] + }, + { + "cell_type": "code", + "execution_count": 391, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAboAAAD/CAYAAACHFRPuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAAuJAAALiQE3ycutAAA7x0lEQVR4nO3deVxU9f748dcsLCIggqTAsIkLgSAgUoaF12zxqyYRlmmGJqY3Sq1uZVndtmt797aMZWnaFSszDf1p3ywtNLUFTRRDxQUEVFKWwI1t5vz+8OvkKJtsMwzv5+MxDzln5pzzngPOez67SlEUBSGEEMJGqS0dgBBCCNGWJNEJIYSwaZLohBBC2DRJdEIIIWyaJDohhBA2TRKdEEIIm2bTiS42NtbSIQghhGgHDX3eq2x5HJ2TkxMBAQGWDkMIIUQby8vL4+zZs3U+p23nWNpFWloaaWlpeHh4kJ2dbelwhBBCtLGQkJB6n7PpEl1ISIgkOiGE6AQa+ry36TY6IYQQwiarLoUQwpbYcMXbFVOpVFd8TIdJdGfPnuWBBx7AycmJfv36MXv2bEuHJIQQbaqmpoaCggKqqqosHYrVcHBwwNfXFzs7uyYfY7FEN2vWLFauXElRURG1tbWm/enp6aSkpFBVVcWwYcNYsGABGo2GVatWMWbMGO644w7uvPNOUlJSruiNCiFER1NQUICLiwsBAQHNKsnYGkVRKCkpoaCggN69ezf5OIu10Y0bN47t27eb7TMajSQnJ7NixQoOHjxIRUUFqampwPlfuJ+fHwCenp4UFxe3e8xCCNFeFEWhqqoKDw8P1Go1KpWq0z/UajUeHh5UVVVdUXWuxUp0Q4cOvWxfRkYG3t7epm6iU6dORa/Xk5SUhK+vLwUFBQwePJji4mJ69OjR5jFm7slme+EZervbo67n25SK8/trjUa25J0hNsCJI2U1KEAfDwfUKhVGReFwaTUB3e04UlZDoLs9eWU1BLrbcai0GhXQ292e3LIaArvbkVtWQ5C7PRqVilqjkU25p/F20aLRqAnsbscPB8r5raia+2Nc+SijglnXeeCo1bLv5Dl2HqskvJc9mccqURQFRVEoqzRgqDVQcMpAlK4rd4V358fc0xw/VQNADyc1+09WUlFlxNVBg5ujmryyKk5XKhhUKv4W4MjPhVX0cdey72Q1RiNU1oJKDZ4udgzysmfToTOcqoLTteAAnAbsgBrTfQJH4Fyb/sasz4bk/g0+f+E/b33/qS/+Wa1Wo9Fo0Gg0Zj9feAjbJCU5cx2+ja6wsBBfX1/Ttp+fHwUFBQAkJCSQkpJCeno61113XZ3Vlnq9Hr1eb9ouKytrUTyHS2tYuL2UlCGe9OvhCJd8g7iwpSgKWw6d5b+ZZZScM/DjkTOogJnXedLXw4HDJdUsyChhdLAra/dV/PVvf1dWZZejAm4P6cba/ef3rd1fwYyYHvTxcGBT7hkW7yjD0U6Fo1bNdX5OrN1/CoAnvjnJmRp4a0sJ8SHdeGtLMWdrFb7OAUM9X3a+zjlD/p+17DlRX52/4ZJthU9/P5+eDpdXX/bqk5U1ZJ+sMdtXc8m/F+5VZ0tyACMW7m/za4x1haRbvbCzs8POzg6tVoudnR329vbY29vj4OCAg4ODVPULqzFs2DBeeumlOgs8Fzz33HNotVqefvrpFl/PqhJdQ0VRJycnFi9e3ODxKSkppKSkmAaMb9y4sUXxxA8NJyDAjwidG2p1w98iBocZ6e1XyB0RPuw+Xo5KgUi/7qjVKoYYFcL7/0m4dzdGHSs3+3dkzJ+oFBioczN77sI1hww00tuvgD6eXdGq1YR5dyMqI4/0fSd5KzGcR1fu4f3xkTg6ahnQv4T0vScY2qcHm3NOYDScL9H9UVFJraGW/SfPMry/F4/e1I8VuwopKD6NSlHRy82R346UUHq6GndnB65ytmfvsQrKzlVTragYH+nD2t+LiPDpxo4jpRgUhTPVtahVanTduzKsXw++2lnAn2er+PMsdNVCcQ10VcGZ//uVagE3FRRL57FWt7oCVn9x/IqO+fWRwTg5OdGlSxcpDQqbZ1WJ7kL15AX5+fnodDqLxaNWq4jy696k12q1au6OOd+GGO3vXu95Lv334tde+tyF8068xt/sfPfFBnFfbBAAn9wXY9p/bWAPrg08X6U7tN9VDcabNCTQbHsqfRp8/fQRwQ0+f//whqvoOpMZ763jm0JLR9GwmLcy6n0u75VR7RiJ6Ai0Wi1PPfUUq1evRqPRsHjxYp555hn27t3LbbfdxptvvgnAli1bmD17NtXV1fj6+rJo0SJ69epFaWkpkyZNIjc3l5CQECorK03n3r17N7Nnz6a8vJwuXbrw/vvvExYW1rrxt+rZWig6OprCwkKys7MJCQlh0aJFJCQkXPF54uPjiY+Pb3BKGCHaygcPtk+iCJizrt3OK8nP8vLy8swSRGtxdHRsdE5gg8FAWFgYL7zwAo888gh33HEHv/76K127dqVv37489NBDeHl5cffdd7N69WqioqJ48803mTVrFsuXL+f5558nPDycdevWsXPnTqKjo4Hzwyfuv/9+Vq5ciY+PDxkZGSQnJ/PLL7+06nu0WKKbPn0669atw2AwoNPpGDt2LHq9noULF5KYmEhVVRVxcXFMmjTpis99oery1KlTbRC5ENbhSpNPSxLjxcdK0ut8VCoVt99+OwAREREcP34cd/fztVHBwcHk5eVRXl5Or169iIqKAs53Jnz11VcB2Lx5M8uWLQMgMjKS8PBwAPbv38/vv//OqFF//U2Vlpa2evwWS3QLFiyoc//w4cNbPD+llOiEuFxDCepKkqAkPcuw5EosarUarVZr+tnBwcHsudra2st6Qzald6SiKAQFBZGZmdmq8V7KJue6TEtLY/LkyVKiE6KJ8l4ZddmjKQLmrGuzKlTRsfTv35+ioiJT0vr4448ZPnw4AHFxcaYSXWZmJllZWcD50uCpU6dMHQcVRWHnzp2tHptVtdG1FinRCdFyFye7xpLZheelhNd5OTg48Omnn5KcnEx1dTU6nY6PP/4YgGeffZZJkyYREhJCaGioqY3Ozs6OtLQ0Zs6cySOPPEJNTQ0JCQlERka2amw2uUzPxcMLLu7FKYRouaaU4CThtZyiKOzbt4/g4GAZNH6R+u5Lp1umJz4+niVLluDi4mLpUISwOU2p2pTqTGFNbLLqUnpdCtH2LiS7+pKaVGcKayElOiFEizRWwpPSnbA0m0x0Qoj211iyk4QnLMUmE50MLxDCMqR0J6yRTSY6qboUwrIk2QlrYpOJTghheQ2V7iTZdWzp6emMGDGiwde88MIL7RRN4yTRCSHaVEPJbrgkPJsliU4I0anUl+wOI6W7jmLZsmX069ePQYMGsXr1agB27NhBbGwskZGRRERE8O233wLw8MMPYzAYiIiIMJX8HnroIQYPHkxYWBj33HMPVVX1Lf7c+mRmFCFEu2koqcl4O3OXzgBiMCqcPNV4cjAqCtnHKgjxdkXdhBlVPF0c0DSysHRRURFRUVHs2LGDXr16kZiYSHl5OatWraJLly7Y2dlx9OhRbrjhBg4dOgScX8OutrbWdI6SkhI8PDwAmDlzJqGhoUyfPr3R+C4lM6P8H+mMIoR1kna7tpd9rILn/t/vZB+raLVz/vLLLwwdOhQvLy9UKhX33nsvAKdPn2bixIkMGDCAUaNGUVBQQHFxcZ3nWL16talEl5aWZprYuT3Y5MwoQgjrlvfKqDoTW8CcdVKyq4dGraJXN8dGX3eViwPuzvZE6NxQN1JSa6m5c+cSFRXF8uXLUalUeHh41Lk4bF5eHs899xw7duzA09OTd999l127drVpbBezyRKdEML6ScmubajVKqL8urdqkrvmmmvYunUrRUVFKIpCamoqAOXl5fj4+KBSqfjyyy/NFk11cnLizJkzAFRUVNClSxe6d+/O2bNnWbp0aavF1hQdJtGdOHGC++67j+uvv97SoQghWokku46hV69evPbaa9xwww1ER0fj6+sLwJNPPsm8efOIiIhg06ZN+Pn5mY558MEHGTRoECNGjCA8PJxhw4YRHBzMjTfeSExMTLvG3+adUWbNmsXKlSspKioya5hMT08nJSWFqqoqhg0bxoIFC9BoNI2eLz4+nrS0tCZdu6HGSSGE9agvsXXmakxZpqduVtkZZdy4cWzfvt1sn9FoJDk5mRUrVnDw4EEqKipMReGsrCxGjx5t9sjIyGjrMIUQFiQlO9GW2rwzytChQy/bl5GRgbe3t2kF8KlTp6LX60lKSiIsLIy1a9e2dVhCCCsjHVREW7FIG11hYaGpjhfAz8+v0fFuVVVVzJgxg127dpGSklLna/R6PSEhIaZHWVlZq8YthGhbUrITbcEiia45zYIODg588MEH5Obmotfr63xNSkoK2dnZzJs3j5iYGLRaGT0hREcjyc6cDc7p0SLNuR8WSXS+vr5mJbj8/Hx0Op0lQhFCWCFJdqBSqXBwcKCkpASj0YiiKJ3+YTQaKSkpwcHB4Yo66LTbFGAXTwdjMBjo27cva9euJSQkhDvvvJORI0cyZcqUVr2m9LoUomOrK7FNDYdnJnSONruamhoKCgradV5Ia+fg4ICvry92dnZm+xv6vG/zur3p06ezbt06DAYDOp2OsWPHotfrWbhwIYmJiVRVVREXF8ekSZNa7ZoX5rqUhVeF6Njq6qCyaDck/+04Xl5eFoqq/djZ2dG7d2+pvrxIc4ZayKTOQgirV1fJLuPRGDw9PS0QjbBGDZXobDLRXSBVl0LYjrqS3W+PD8Hd3d0C0Qhr0+lWL0hLS2Py5MlSdSmEDbn/uss/rqJe+4ny8nILRCM6EinRCSE6jL/NWUduHfv3PB2Hs7Nzu8cjrEenK9EJIWzTD/UMOxjw0ibpmSjqZZOJTqouhbBd9Y2xu+2fG8wmjhfiAptMdLLCuBC2ra5ktx/Yf+AARqOx/QMSVs0mE50QwvbVlexGfXKYvLw8GXcmzNhkopOqSyE6h7qS3fAP93L06FELRCOslfS6FEJ0eHWNsdvx2LV4eHhYIBphCdLrUghh096a0PuyfYNe/5nTp09bIBphbSTRCSE6vITwq7nt8lzH7S9torq6uv0DElZFEp0Qwia8c/8oLs11B4APv/5OemJ2cjaZ6KQzihCd0/evjLpsSZY3foa8vDxLhCOshHRGEULYnLo6p/w0K6pTLO3TWUlnFCFEp1LXsIOEt3+TCaA7qQ6T6NatW0dycjITJ07kk08+sXQ4Qggr9/n0gWbbx4FpL2+ROTE7oTZPdLNmzUKn06HVmtecp6enExoaSp8+fUhOTsZgMDR4nlGjRrFw4UKWLVvGqlWr2jJkIYQNuDZQR/K15vt+Bd5euUE6p3QybZ7oxo0bx/bt2832GY1GkpOTWbFiBQcPHqSiooLU1FQAsrKyGD16tNkjIyPDdOzLL7/MtGnT2jpsIYQNeDp+FHeHme+bvxvy8/MtE5CwiHbrjKLVak0zi//yyy889thjbN68GYD169ej1+tZs2ZNg+d48cUX6du3L+PHj2/SNaUzihACYPxL6/j5orHjrsDGR2Pw9PS0WEyidVldZ5TCwkJ8fX1N235+fhQUFDR4zEcffcTnn39Oeno6zz33XJ2v0ev1hISEmB5lZWWtGbYQooP6/OlROFy0XQE8O/9Xzpw5Y6mQRDuySKJrTiFy2rRp/P7773zwwQf1JrqUlBSys7OZN28eMTExl7ULCiE6r13P3WK2/b/n4L8b0mUNu07AIonO19fXrASXn5+PTqdrtfPLenRCiEs5Omp55nbzcXSv/gTZOTmyrI+Ns0iii46OprCw0FSfumjRIhISElrt/DIzihCiLlOviWKi+agDbvtvLkVFRZYJSLSLNk9006dPR6fTYTAY0Ol0pKSkoNFoWLhwIYmJiQQFBeHs7MykSZPaOhQhhOBfd4/i1ksqkB5/+zf5YmzDZAowIUSnNPPDdaw5/Nf2tP7w+D03Y2dnZ7mgRLNZXa/LtiZVl0KIxrxzv/k0YR/th7Sff5b2OhvUaKKbMmUKP/30U3vE0mqkM4oQoin+mzzAbPuxdRX88ccfFopGtJVGE93NN9/M3LlzGTBgAP/+978pKSlpj7haREp0QoimuKGPP0/H9zLb95/PdsjK5Dam0UR399138/3337N69WpOnjxJZGQk48ePJz09vR3Cax4p0Qkhmir52kGkxP01nPzzP+CjdZtkfJ0NaXIb3eHDhzlw4ABdu3YlODiYF198kaSkpLaMTQgh2sVjI0eYbb+9E77ctk3a62xEo1OHvPjiiyxdupTw8HCmT5/OTTfdZHqub9++bRpcc6WlpZGWliZVl0KIJkt/NJZhb241bc/5+hR9vQ4yyEo/50TTNVqiU6vVbN68mS+//NIsycH5yZitkVRdCiGuVICnG1/+PdJs3x2Lcjh79qyFIhKtpdFEd+DAAXr1Mm+snTx5MgC9e/duk6CEEMISov29WTjlarN9i775Qdav6+AaTXS7d+++bN+OHTvaJJjWIr0uhRDNNaJ/b1JG/PXl/s1fIWPvXgtGJFqq3kT37rvv0rdvX/bu3Uu/fv1MDy8vLwYNGtSeMV4xqboUQrTEo8OjzLbvWppHUXGxhaIRLVXvFGDl5eWUlZXxyCOP8O9//9u038XFBXd393YLsCVkCjAhRHNlFhQRr/+r9ipQBeufvwl7e3sLRiXq09Dnvcx1KYQQ9dh2uIAJH/7VfPPObd0ZM2QIKpXKglGJujT0eV/v8IL4+HjS0tLo27ev2S9VURRUKhU5OTmtH2krkeEFQojWcF1vXzydszl5+vzg8ZlrygjoeZjwoCALRyauRL0luuPHj+Pl5cWRI0fqPNDf379NA2sNUqITQrTUn6cqifjXRrN962dF0d/Lq54jhCVI1aUQQrRAYUkFQ1//0Wzfj4/F4uvhZpmAxGWaVXV5aZXlBR2h6lIIIVqTzsOV354aTtS87037rn99KwdfGolWa5OrndmUehPdhg0b2jOORh04cIA333yTqqoqvL29+de//mXpkIQQnYi7axcy595oVo0Z86/1pD86HFdnhwaOFJZW71cRf3//Bh9NNWvWLHQ6HVqteU5NT08nNDSUPn36kJycjMFgaPA8ffv25YMPPmDx4sVSmhRCWISbiyMbHh5i2i49Z+TGV62rUCAuV2+ii4+PB84nmIsHjF/Ybqpx48axfft2s31Go5Hk5GRWrFjBwYMHqaioIDU1FYCsrCxGjx5t9sjIyADgu+++IyEhgcGDB1/p+xRCiFbRp6c7v84ZZto+WQPf7jmK0Wiz3R06vHbrdanVak3rO/3yyy889thjbN68GTg/ObRer2fNmjVNOtfo0aP56quvsLOza/B10hlFCNFW1u0qIOWzv8bYvTcpjNGhfhaMqHNrVmcUr//rOuvv709xcbGpVBUTE4OHh0eLAiosLMTX19e07efnR0FBQYPHbN26leXLl1NTU0NkZGSdSU6v16PX603bZWVlLYpTCCHqMzJMx3saIw+m7gHgwaVZXPtkD3p0c7JwZOJSjXYXWr58OWFhYSxcuJCPPvqI8PBwvvjiixZdtDkjGmJjY3nnnXd4//33efHFF+t8TUpKCtnZ2cybN4+YmJjL2gWFEKK1qNUqRg/w5+7Bf00APeTlH6QK0wo1mgmeffZZMjIy0Ol0ABw9epQbb7yRO++8s9kX9fX1NSvB5efnm84vhBAdyfNjIvhy+zfUKFADfLT1INNi+6BWyzRh1qLREp2zs7NZEvLx8aFr164tumh0dDSFhYWm+tRFixaRkJDQonNeTFYvEEK0F3t7DT/N+Ztp++V1OWzIOWbBiMSl6k1027ZtY9u2bcTFxXH33Xfzv//7v3zzzTdMnDiRYcOGNfkC06dPR6fTYTAY0Ol0pKSkoNFoWLhwIYmJiQQFBeHs7MykSZNa4/0Ash6dEKJ99ejmxC9zrjdtP/BJJtXVDQ+ZEu2n3l6Xf/vb3+raff4glYrvv/++3uethfS6FEK0p3HzN5GRfxqAKdf68M/4CMsG1Il0urkuL6xesHHjxkZ7cwohRGs5e7aGkBe+BcDDAXY8P8rCEXUezRpecLGcnBz27NlDZWWlad+ECRNaJ7o2EB8fT3x8PCEhIZYORQjRiTg52RHo0YXcknOUV8Ofp87i5iLDDSyt0c4ob775Jvfccw/3338/q1at4u9//zvLly9vj9iaTdrohBCW8sKYAQDUKjD67XQZbmAFGk10S5YsYevWrfj4+PDll1+SmZmJ0Whsj9iaTXpdCiEsJbafJ+E+rgAUnlZ4ZtV2amut+zPT1jWa6BwdHbGzs0OlUlFbW0tgYCB5eXntEFrzSYlOCGEparWKz6dei6fz+dmblm0/wZKtBywcVefWaKLr3r07FRUVDB8+nHHjxjFt2jTc3d3bI7ZmkxKdEMKSnJzs+PEfw+n2f6v3LP85D4NBSnWW0miiW7NmDc7Ozrz++uvcfvvthIeHs3r16vaITQghOixHRy13RgcAcKislgeW/ixj6yykSVWXpaWlfPvtt3h6ejJhwgTc3NzaIbTmk6pLIYQ1eOyWYOL6emAE1u8r4x8rfpPOKRZgkUmd25pUXQohrIG9vYZFSTGMDrsKgHV7TvBz7kkLR9X5NDpgvH///mzcuPGySZ337dvXLgG2hMyMIoSwBrW1Rm57N53sP84R5ePE3DEDifTrLhM/t6KGPu8tMqmzEEJ0JlqtmmdHh+Oggb1FZ0n6+GdeW79Phh20k3pnRtm2bRuAaVLne++9F5VKxdKlS69oUmchhBAQE+TBJ1Oi+WFnDp/srOCDTYcxKgpzbr1aSnZtrN5EN3fuXLPt1157zfTz8ePH2y6iVnBhrkvpjCKEsBZqtYpr+/QkyE1D0andrM45R+rP+YwI6UlMgIelw7NpNjmp8wXSRieEsEb7Dxzgif93iMwTBu6/PoA5I0OkVNdCLWqjMxqNLFiwgPHjxzN+/Hg++ugjq58CTAghrFlQYCBD/JwB+PSXfD7PyJdhB22o0dULZs+eTUFBAZMnT0alUvHJJ5+QlZXFO++80x7xCSGEzdFqtUwdHkLRqV2sP3yO19bvp18vF6L9rXvWqY6q0US3adMmdu3aZdoePXo0ERERbRlTvQwGA2PGjOHmm29m9uzZFolBCCFaQw93dx4aFsBVXQtYse8c3/1ehEpBhh20gSZVXVZUVJi2T58+zZU0682aNQudTodWa55T09PTCQ0NpU+fPiQnJ2MwND41zr///W9uu+22Jl9bCCGsmb+fH/8T7MbdA1xYkVHA5MW/sj2/1NJh2ZxGE90DDzxAdHQ0TzzxBE888QSDBw/mwQcfbPIFxo0bx/bt2832GY1GkpOTWbFiBQcPHqSiooLU1FQAsrKyGD16tNkjIyODjIwMHB0dCQ4OvsK3KIQQ1kmj0RAYEMAwP3tu7ufK6SoDP+w9Ie11razBXpeKonDs2DGKi4vZtGkTKpWKG264gYEDB17xhbRaLbW1tQD88ssvPPbYY2zevBmA9evXo9frWbNmTb3H/+tf/+LEiRMcPXqUkydP8umnn+Lj49PgNaXXpRCiI8jPz+f4H3+w8pCK7w+U8cG9g6S97go19HnfYBudSqXi1ltvJSsrq1nJrT6FhYX4+vqatv38/CgoKGjwmAvj+tLT08nMzKwzyen1evR6vWm7rKyslSIWQoi2o9PpqKioIKrHWTbkKPy4/4S017WiRqsug4ODycnJadWLtmTo3rBhw+rtiJKSkkJ2djbz5s0jJibmsnZBIYSwRmq1msDAQILc1CRFufPprwVMWZLBbwXyZb01NJoJjh49ysCBA4mKijKb4/Lbb79t9kV9fX3NSnD5+flm82kKIURn07VrV7y9vLiO43QZ4s0HW49yoOgUUb5SqmupRhPdyy+/3OoXjY6OprCwkOzsbEJCQli0aBEJCQmtdv74+Hji4+MJCQlptXMKIURb8/b2pry8nEHqSqYN9eet73LoK+PrWqzRqsu4uDhiY2O56qqruOqqq4iNjSUuLq7JF5g+fTo6nQ6DwYBOpyMlJQWNRsPChQtJTEwkKCgIZ2dnJk2a1KI3cjFZeFUI0RGpVCoCAwNRAVdpz1FVa0SRHpgt1miJLj09nXvvvRdPT08URaG0tJRPPvmkycluwYIFde4fPny49IgUQohLdOnSBW9vb/buOoxGDWqVVFu2VKOTOg8YMIDU1FTTbCi7du1i4sSJ7Nmzpz3iaxEZXiCE6IgURWHvvn38vz0nuWFgP67p01Pa6RrRokmd7ezszKb8GjhwIHZ2dq0WXFuQqkshREemUqkI6t0bVCoe+jyT32S2lBZpNNHdcsstzJkzh71797Jv3z6eeuopRo4cybFjxzh27Fh7xHjF4uPjWbJkCS4uLpYORQghmsXBwQG/Xp5UGxSKS0osHU6H1mjVZWBgYP0Hq1QcPny41YNqLVJ1KYToyIxGhbQtuzl99ixRIX2oRkuEzk2qMevQ7JlRAHJzc1s9oLYmK4wLIWyBWq1i9LVXk52dza9781iwo5z5E6IYFCDDDa6ErDAuhBBWrqSkhMO5uZwwOOPS3R2jAjf09ZSS3UVa1BlFCCGEZXl4eODevTu9tGfQGGt5clUWmYV/WjqsDsMmJ4OUqkshhK3x9/fn9OnTuNSU8ODfeqMYFYxGRUp1TWCTJTrpdSmEsDVarRZ/f39qqqv5s+xPHl2xS0p1TVRvia5v376o6hiRrygKKpWq1Vc0EEII0TA3Nzd69OiB76nj/GN4ABE6N0uH1CHUm+g2bNjQnnEIIYRoggtr12WfOInR6ItabZMtUK2q3jvk7+/fnnEIIYRoAo1GQ2BgILuP7WbNT9nEx4ZJO10j6k108fHxpKWlXVaF2RGqLqUzihDCljk7O+Ph7s5T63JxcXHhpoj6J/YQDYyjO378OF5eXhw5cqTOAztCiU/G0QkhbFVtrYHVWzIpP1fNXcMi6drF0dIhWVSzZkbx8vICOkZCE0KIzkar1fA/14Twe3Y2q3/ey7jrw7HTaiwdllVqdHhBZmYm1113Ha6urtjb25se7S09PZ3Y2FhmzJjBihUr2v36Qghhbbp06YLOx4e+LgbW7jjI9rxSjLJQ62UaTXR///vf0ev1BAUFUVpayrx583j++eebfIFZs2ah0+nQas0Lj+np6YSGhtKnTx+Sk5MxGAwNnkelUuHs7MzZs2fp3bt3k68vhBC2rGfPnri6uHD2z1IeWLaDnw7LSgeXqjfRbd26FYDq6moiIyOpra3F2dmZf/zjH6xcubLJFxg3bhzbt28322c0GklOTmbFihUcPHiQiooKUlNTAcjKymL06NFmj4yMDK6//nrWr1/P+++/z7PPPtuc9yqEEDZHpVIREBBAsKcDc4Z6YK9RcfTPc5YOy6rU20b30EMP8dtvv5mqKf39/Vm+fDk6nY7y8vImX2Do0KGX7cvIyMDb25uQkBAApk6dil6vJykpibCwMNauXVvv+bp27YoNz0MthBBXzMHBAX8/PzhyhF72lZyrdmDv8Qqu9nK1dGhWodGRhv/85z8pLy/njTfe4IEHHqCiooJ33323RRctLCzE19fXtO3n50dBQUGDx6xatYpvvvmG06dPk5SUVOdr9Ho9er3etF1WVtaiOIUQoqPo0aMHf/75J0VFRfTv342zij2/HC4hJtC9zlmuOpN6hxf06NGDhIQEs30XXqpSqfjwww+v6EJarZba2loAvvzyS7766iuWLVsGwN69e5kwYQI7d+684jdQlwvj6DZu3NhoAhVCCFtRU1NDdnY2Go2Gq6++mhojZOSVMjjAHUc72+6R2azhBV27diU2NrZNAvL19TVLQPn5+eh0ulY7f3x8PPHx8aaqUSGE6Azs7Ozw9/fn0KFDFBYW4u/vz9A+PcjIK8Pfw4merp1zrF29ic7Dw6PeKsKWio6OprCwkOzsbEJCQli0aNFlpceWkJlRhBCdlZubGx4eHhQXF+Pm5ka3bt2ICXRnX1EFFedq6Nuz863qUm+vy9bq8DF9+nR0Oh0GgwGdTkdKSgoajYaFCxeSmJhIUFAQzs7OTJo0qVWuJ4QQnZ2vry/29vYcOXLE1GQU3MuVrg5ath8p5bf8sk413q7eNrrTp0/j7Ozc3vG0KpkCTAjRWZ06dYqcnBy6d+9uNvZ4R14pj67YxVt3RRDl192CEbauhj7v6y3RdeQkl5aWxuTJk6XqUgjRabm4uNCzZ0/KysooKflrEHmkX3feuiuiU61lV2+JzhZIiU4I0ZkZjUb27dtHdXU1ISEhFpm+sb00q0TXkUmJTgghQK1WExAQgNFoJC8vr9NOtmGTiS4+Pp4lS5bg4tL5ehcJIcTFnJyc8Pb25tSpU5w4ccLS4ViETSY6IYQQf+nZsyfOzs4cPXqUs2fPWjqcdmeTiU6qLoUQ4i8qlYrAwEDUajW5ubkYjUZLh9SubDLRSdWlEEKYs7e3x8/Pj8rKSo4ePWrpcNqVTSY6IYQQl3N3d8fd3Z0TJ05c0So0HZ1NJjqpuhRCiLr5+flhb29PXl4eNTU1lg6nXdhkopOqSyGEqJtGoyEwMJDa2lqOHDli6XDahU0mOiGEEPVzdnbGy8uL8vJyTp48aelw2pwkOiGE6IS8vLzo2rUrhYWFVFZWWjqcNmWTiU7a6IQQomEqlYqAgAAAcnNzbXrWFJtMdNJGJ4QQjXN0dMTX15ezZ89y7NgxS4fTZmwy0QkhhGiaHj164ObmRlFRkc3WgkmiE0KITs7f3x87Ozvy8vJMC7Xakg6T6BRF4Z///CczZ87k7bfftnQ4QghhM7RaLQEBAVRXV5Ofn2/pcFpdmye6WbNmodPp0Gq1ZvvT09MJDQ2lT58+JCcnYzAYGjzPmjVryM3NxcHBAW9v77YMWQghOh1XV1euuuqqyxZqtQVtnujGjRvH9u3bzfYZjUaSk5NZsWIFBw8epKKigtTUVACysrIYPXq02SMjI4O9e/cyaNAgXn/9dT7//HPOnTvX1qELIUSn4uPjQ5cuXcjPz6eqqsrS4bQabeMvaZmhQ4deti8jIwNvb29CQkIAmDp1Knq9nqSkJMLCwli7du1lx+Tk5JhKfV27dqW6upouXbq0bfBCCNGJqNVqAgMD2bt3L7m5ufTv3x+VSmXpsFrMIm10hYWF+Pr6mrb9/PwoKCho8JiEhAQ2bdrE7NmzCQoKolu3bpe9Rq/XExISYnqUlZW1euxCCGHLunTpgq+vL2fOnLGZIQdtXqKrS3MGJnbp0oVFixY1+JqUlBRSUlJIS0sjLS2NjRs3NjdEIYTotDw9PamoqKCoqAhXV9cOPybZIiU6X19fsxJcfn4+Op3OEqEIIYSow4UhB7m5uR1+yIFFEl10dDSFhYVkZ2cDsGjRIhISElrt/DIzihBCtIxWqyUwMJCamhry8vIsHU6LtHmimz59OjqdDoPBgE6nIyUlBY1Gw8KFC0lMTCQoKAhnZ2cmTZrUateUuS6FEKLlXFxcTKscnDhxwtLhNJtKseGZPENCQkylRiGEEFdOURRycnI4c+YMwcHBODk5WTqkOjX0ed9hZka5ElKiE0KI1qFSqQgMDEStVpObm4vRaLR0SFfMJhOdtNEJIUTrsbe3x9/fn8rKyg45RZhFhhe0tQvDC6REJ4QQraN79+706NGD4uJiXF1dcXd3t3RITSYlOiGEEE3i6+uLo6Njh5sizCYTnbTRCSFE61Or1fTu3Ruj0dihViW3yUQnJTohhGgbHXGKMJtMdEIIIdqOp6enaVXyiooKS4fTKJtMdFJ1KYQQbcvf3x97e3tyc3OpqamxdDgNsslEJ1WXQgjRti5MEVZbW2v17XU2meiEEEK0PWdnZ3x8fDh16hRFRUWWDqdekuiEEEI0W8+ePXF1deXYsWNW21wkiU4IIUSzXZgi7MKSPtbYXmeTiU46owghRPvRarX07t2bmpoaq2yvs8lEJ51RhBCifVlze51NJjohhBDtz1rb6yTRCSGEaBXW2l7XYRLd1q1bmTFjBjNmzCAwMNDS4QghhKjDhfF1NTU15OXlWUV7XZsnulmzZqHT6dBqzVcESk9PJzQ0lD59+pCcnIzBYGjwPLGxsXzwwQfcc8893HvvvW0ZshBCiBZwcXHB29ubiooKq2iva/NEN27cOLZv3262z2g0kpyczIoVKzh48CAVFRWkpqYCkJWVxejRo80eGRkZpmM//PBDpk2b1tZhCyGEaIFevXpZTXtdmy+8OnTo0Mv2ZWRk4O3tTUhICABTp05Fr9eTlJREWFgYa9eurfNcJ0+epKqqCp1O16YxCyGEaBmVSkVAQAB79+4lNzeXkJCQy2r22otF2ugKCwvx9fU1bfv5+VFQUNDocR9//DFTpkyp93m9Xk9ISIjpUVZW1irxCiGEuHJ2dnam9jpLjq+zSKJr7pt94oknuPXWW+t9PiUlhezsbObNm0dMTIzFvj0IIYQ4z8XFBR8fHyoqKjh+/LhFYrBIovP19TUrweXn50t1pBBC2KiePXvSrVs3jh8/Tnl5ebtf3yKJLjo6msLCQrKzswFYtGgRCQkJrXZ+mRlFCCGsx4X2Ont7e/Ly8qiurm7X67d5ops+fTo6nQ6DwYBOpyMlJQWNRsPChQtJTEwkKCgIZ2dnJk2a1GrXlLkuhRDCumi1WoKCgjAYDBw+fLhd2+vavBFrwYIFde4fPny4qUQnhBDC9jk5OeHr60t+fv5lnRLbUoeZGeVKSNWlEEJYJ09PT9zd3Tlx4gSlpaXtck2b7JaYlpZGWlqaVF0KIYQV8vf359y5cxw5cgQnJyccHR3b9HpSohNCCNGu1Go1vXv3BuDQoUONTgHZ4uu16dmFEEKIOjg6OhIQEEBlZSXHjh1r02tJ1aUQQgiL6N69O/7+/ri5ubXpdVSKNayh0EZCQkKkZ6cQQnQCDX3eS9WlEEIImyZVl0IIIWyaVF0KIYTo8KTqUgghRKcliU4IIYRNk0QnhBDCpklnFCGEEDbNpjujuLq6drgFXcvKyujevbulw+hQ5J41j9y3Kyf3rHna474VFhZSUVFR53M2neg6IukpeuXknjWP3LcrJ/eseSx936SNTgghhE2TRCeEEMKmSaKzMikpKZYOocORe9Y8ct+unNyz5rH0fZM2OiGEEDZNSnRCCCFsmiQ6K1BQUMCNN97I1VdfTWhoKE8++aSlQ+pQUlJS0Gptckhomzhz5gxJSUn079+f4OBgFixYYOmQOoTU1FTCw8OJiIjg+uuvZ//+/ZYOyerMmjULnU532f/HOXPm0KdPH/r168fKlSvbPS5JdFZAq9Xy6quvsnfvXnbu3MmWLVtYvXq1pcPqEH788UdOnz5t6TA6lEcffZTQ0FD279/P3r17uf322y0dktU7e/Yss2bN4vvvvyczM5OJEyfy9NNPWzosqzNu3Di2b99utm/Dhg1s27aN/fv388MPP/Dwww+3+/9ZSXRWwMvLi+joaADs7e2JjIwkPz/fwlFZv6qqKubMmcMbb7xh6VA6jFOnTrFmzRoeeeQRAFQqFVdddZWFo7J+RqMRRVFMH9Dl5eV4eXlZOCrrM3ToUHr16mW2b+XKlUyePBmNRoOPjw+xsbF8++237RqX1PdYmdLSUtLS0tr9D6EjeuGFF5g6dSqenp6WDqXDOHz4MD179uTBBx/k119/xd/fn//85z/4+/tbOjSr5uzszHvvvceAAQPo1q0b3bp146effrJ0WB1CYWEhCQkJpm0/Pz8KCgraNQYp0VmR6upqEhMTmTVrFsHBwZYOx6rt3r2bX375hSlTplg6lA6ltraWzMxMEhMT+e233xgzZgz33XefpcOyejU1NcyfP5+MjAyOHj1KYmIiTzzxhKXD6hCsoWO/JDorYTAYmDBhAhERETz66KOWDsfqbd26lezsbAIDAwkICMBgMBAQEFDvXHfiPJ1Oh4eHByNGjABg/Pjx7Nixw8JRWb/MzEwUReHqq68Gzt+3bdu2WTiqjsHX19esBJefn9/ucxBLorMS999/Py4uLrz55puWDqVD+Pvf/86xY8fIy8sjLy8PjUZDXl4erq6ulg7NqvXs2ZPQ0FB+++03AL777jtCQ0MtHJX10+l07N+/n6NHjwLn71tISIiFo+oYEhISWLJkCQaDgaNHj7JlyxZuvvnmdo1B2uiswNatW/n4448ZMGAAkZGRANx3333MnDnTwpEJW/T+++8zdepUzpw5g5ubGwsXLrR0SFbPy8uLV155hZtuugk7Ozs8PT35+OOPLR2W1Zk+fTrr1q3DYDCg0+kYO3Yser2e7777jn79+qFWq3nrrbdwcXFp17hkZhQhhBA2TaouhRBC2DRJdEIIIWyaJDohhBA2TRKdEEIImyaJTgghhE2TRCeEEMKmSaITdbK1ZW/S0tLYvXt3s469+eabTQOFAwICKCwsbHYcr7zyCkFBQahUqsvO8/bbb9O3b1/69OmDXq9v9jWaa/LkyaSmprb7dTu7V155hf/+97+WDsOmSaITFlNbW9tu52puotu4cSM+Pj74+Pg0NzQzN954I99///1lkygfOHCA+fPns3PnTnbu3Ml//vMfcnNzW+WadWnNe9+RY2hIe8WXkpLC66+/bhVzQtoqSXSiUVqtlhdeeIGIiAjCw8PJyclBURQCAwNNJR2AUaNGmVZdePfdd4mJiWHgwIEkJydTU1MDnC8RzZkzh+joaN59912++uor02KW4eHhHDlyBID09HSGDh3KoEGDuPnmm+uc7XzJkiWMHj2aW265hQEDBgCQmJhIdHQ0AwYM4OGHHwbghx9+YM2aNcydO5eIiAi2bdvGuXPnmDFjBjExMYSFhfHee+/V+d4/+eQTEhMT63xuy5YtREdHEx4ezqhRoygqKgLOr0DxP//zP4SGhnLHHXdwzTXXsGXLFgAGDx5c50oBq1at4q677sLZ2RkXFxcSExP56quvzF5jNBrx9vamqqoKo9GIu7u7qeT33HPPmd7DV199xcCBAwkLC2PChAmcOnUKOF9imz59OkOGDCEpKYnKykomTZpEcHAwt9xyCydPnjRd69lnnyU0NJTw8HBuuummOt+/VqvlqaeeIiwsjKioKLKyskxxzp07l5iYGMLDw3nqqafMjnn66aeJjIxk1apVZufbsWMHsbGxREZGEhERYbaCR33XWrJkCWPGjGHEiBH079+fpKQkqqurAThy5AijR48mOjqa6OhoNm3a1KTrXBzf4sWLiYmJITIykmHDhpm+fKSnpxMbG8uECRMICQlh5MiRVFZWAlBcXMxdd91FeHg44eHhfPbZZ8D5iciHDx/OoEGDGDp0qOk9uLi4EBQUxI8//ljnfRatQBGiDhqNxvQzoHz22WeKoijK66+/riQnJyuKoihPPPGE8sYbbyiKoijFxcWKTqdTamtrlY0bNyr33HOPYjAYFEVRlJSUFGX+/PmKoiiKv7+/8swzz5jOHRYWphw7dkxRFEU5e/ascu7cOaWkpEQZMmSI8ueffyqKoihffPGFcuedd14W4+LFi5UePXooRUVFpn3FxcWKoiiKwWBQxo4dq3zzzTeKoihKUlKSsnTpUtPrnnnmGWXBggWKoihKZWWlMmjQIOX333+/7BoBAQFm5/f391cKCgqUyspKRafTKTt27FAURVHeeOMNU4wzZ85UnnrqKUVRFCUzM1PRaDTKjz/+aHbeC+e54MEHH1Q+/PBD0/b8+fOV2bNnXxbPbbfdpmzatEnJzMxUYmJilHHjximKoihxcXFKVlaWcvz4caVXr17KkSNHTOd9/PHHTfdg+PDhSnV1taIoivLWW28pd999t2I0GpX8/HzF1dVVWbp0qVJSUqJcffXVpt9faWnpZXEoyvm/C71eryiKoqSlpSmDBg1SFEVRFi1aZHr/BoNBGTNmjPL111+bjlm0aFGd5ysvLzfFVlhYqPTu3bvRay1evFhxc3NTCgsLFaPRqCQkJCjvvvuuoiiKMnz4cGXPnj2KoijKkSNHlMDAQMVoNDZ6nYvju/D3pCiKsnLlSmX8+PGKoijKDz/8oDg5OSmHDh1SFEVRRo0apaSmpiqKoigTJkxQXnjhBdNxJSUlSnV1tXLNNdcohYWFiqIoyq+//qrExMSYXvPSSy8pL730Up33RbScbTXEiDahUqm44447gPMlkm+++QaAiRMnkpSUxKOPPsoXX3xBQkICGo2Gr7/+ms2bNxMVFQVAZWUlXbp0MZ1v4sSJpp+HDRvGPffcQ3x8PGPHjsXPz48NGzawf/9+4uLigPMlhPrmxhsxYgQ9e/Y0bX/44Yd88cUXGAwGTpw4wdChQ7nlllsuO+7rr7/m3LlzzJ8/H4CKigr2799/2US9x48fr3O9u3379tGrVy/Te5w6dSqvvvoqAJs3b2bZsmUAppJVY5QmVlvFxcWxadMmunXrxrRp09Dr9VRWVnLw4EFCQ0NZs2YNQ4cOxc/PzxTX/fffbzr+zjvvxM7OzhTntGnTUKlU+Pr6Mnz4cAC6detG165dmTx5MrfccgtjxoypN56kpCQAxo4dy5QpUzhz5gxff/01u3btYt26dQCcOXOGAwcOMHLkSAAmTJhQ57lOnz5NcnIy2dnZaLVaCgoKKC4upkePHvVeC87/DVyoWr733ntJTU1l8uTJbNmyxexvrbq6mhMnTmAwGBq8zsXx7d+/n7lz51JcXIzBYECt/qsSLCoqit69ewPn/19cKO2tX7+e999/3/Q6d3d39uzZw++//86oUaNM+0tLS00/X3XVVWRmZtZ7n0XLSKITjVKr1aYPR41GY2q7CAsLo7a2ln379vHpp5+aVl5QFIWHH36Y2bNn13m+rl27mn5+55132LlzJ9999x1xcXGkpqaiKArXX389aWlpjcZ28bk2bdrEypUr2bx5My4uLjz66KOm6qRLKYrCsmXLiIiIaPD8jo6OVFVVmSVqOJ/8G9pu6nMXNHUpk2HDhvH444/TrVs3Xn31VTZu3MiSJUuIiYlBpVI1GtfF96s+Go2Gbdu2sXnzZtavX8/TTz9NZmYm3bp1a/RYOH9vX3/9deLj4+s8t6OjY53HzZ07l6ioKJYvX45KpcLDw6Pe39/F6rq/RqMRJyenOpPHlClT6r3OpfFNnDiRTz/9lCFDhpCVlcXtt99ues7BwcHsfV3cpndpTIqiEBQUVG8yu/TLoGhd0kYnWmTChAm8/PLLnDhxgpiYGABGjhzJ4sWL+fPPPwEoKyurt2NFTk4OkZGRPP7449x0001kZmYyZMgQfv31V/bs2QOcX/TyQntGQ8rLy3Fzc8PFxYWSkhJWrlxpes7FxcVsrbqRI0fy9ttvYzAYgPOdQepayy40NJQDBw5ctr9///4UFRWZPrg+/vhjU4nohhtuMLXLZGVlNakTzO23387y5cs5ffo0p06d4ssvvzT7UL0gIiKC7OxsDh06RJ8+fRg2bBgvv/wyw4YNAyAmJoatW7eaenQuXrzYFNel4uLi+PTTTwE4evQoP/zwAwCnTp2ipKSEG2+8kVdeeQVHR8d6e5ouXboUgLVr19K7d2+6du3KyJEjef/9903J49ixY6b2y4aUl5fj4+ODSqXiyy+/NCvx1HctgA0bNnD8+HEURSE1NZW4uDhcXV0JDQ01W2HgwtJEjV3nYhUVFabS4ocfftjoewC45ZZbePvtt03bpaWlBAcHc+rUKTZu3AicT3w7d+40vSYnJ8fUzixanyQ60SITJkxg6dKljB8/3rRvxIgRzJgxgxtuuIHw8HBuvPHGej8oH3/8cQYMGEBERAR//PEH99xzDz169OCzzz4jOTmZgQMHEhERwebNmxuN5dZbb8XZ2Zn+/fuTmJjIDTfcYBbne++9Z+qM8vTTT+Ps7MzAgQMZMGAA06ZNM3ViuNhtt91m+nC6mIODA59++inJycmEh4fz3Xff8Z///Ac435EjIyOD0NBQnn/+eQYMGGAqDb300kvodDoKCwsZPHgwt912GwD9+vVjxowZREREEBERwcyZM03VYhdTq9VERUWZPhTj4uLIz883VfP26tWL9957j1GjRhEWFsaJEyeYO3dunfdrxowZqFQqgoODue+++4iNjQXOJ4KxY8eaOlOMHTu2zjXrNBoNBQUFhIeH8+yzz5qSytSpUxkyZAjR0dGEhYWRkJBg+tLTkCeffJJ58+YRERHBpk2bTNWvDV0LIDY21tSppkuXLkybNg2AZcuWmTrmhISEmDrrNHSdS7322mvExcUxaNAgunfv3uh7gPPDRHbv3s2AAQMYOHAg3333HXZ2dqSlpfHSSy8xcOBAQkNDzb6Ibd68mVtvvbVJ5xdXTpbpEaIBJ0+eZOzYsWzdurVJVZBwvi1IpVJhZ2fHgQMHGD58ODk5OTZXNaXVatutC35911qyZAlbtmzp0Gvq/fTTT8yfP99UYhWtT9rohGiAp6cnTzzxBEVFRXh5eTXpmOPHj3P77bdjMBhQFIUPPvjA5pKcaD0lJSXMmzfP0mHYNCnRCSGEsGnSRieEEMKmSaITQghh0yTRCSGEsGmS6IQQQtg0SXRCCCFsmiQ6IYQQNu3/A8l63zKTehgMAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 391;\n", + " var nbb_unformatted_code = \"surv_model.plot(color=\\\"gray\\\", alpha=0.4, label=\\\"model\\\")\\nsurv.plot(marker=\\\".\\\", ms=1, lw=0.2, label=\\\"data\\\")\\ndecorate(\\n xlabel=\\\"Inverse rate (log10 words per appearance)\\\",\\n ylabel=\\\"Tail probability\\\",\\n yscale=\\\"log\\\",\\n)\";\n", + " var nbb_formatted_code = \"surv_model.plot(color=\\\"gray\\\", alpha=0.4, label=\\\"model\\\")\\nsurv.plot(marker=\\\".\\\", ms=1, lw=0.2, label=\\\"data\\\")\\ndecorate(\\n xlabel=\\\"Inverse rate (log10 words per appearance)\\\",\\n ylabel=\\\"Tail probability\\\",\\n yscale=\\\"log\\\",\\n)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "surv_model.plot(color=\"gray\", alpha=0.4, label=\"model\")\n", + "surv.plot(marker=\".\", ms=1, lw=0.2, label=\"data\")\n", + "decorate(\n", + " xlabel=\"Inverse rate (log10 words per appearance)\",\n", + " ylabel=\"Tail probability\",\n", + " yscale=\"log\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make a Prior" + ] + }, + { + "cell_type": "code", + "execution_count": 394, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 394, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZcAAADqCAYAAABurQimAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAAuJAAALiQE3ycutAAAv3ElEQVR4nO3dfVyU953v/9cw3AgOIHKjwAwaGVBnvEGXk5s1AU2bHI+GNlJi86sPqxaapNJN95d1bfZHzp6u57HNrna7SRrSR/cHOVp9bE617CZ4wpqaDRBzZ2iUYMBwYyQwgIIIjKCMzM35A2cqIgw3A9cMfJ6PxzTOXN/rms81hXlzfb/f67pUDofDgRBCCOFBfkoXIIQQYuaRcBFCCOFxEi5CCCE8TsJFCCGEx0m4CCGE8DgJFyGEEB7nr3QBnhQWFoZWq1W6DCGEmBVMJhNms/muy2ZUuGi1WmpqapQuQwghZgWDwTDiMukWE0II4XESLkIIITxOwkUIIYTHSbgIIYTwOAkXIYQQHifhIoQQwuNm1FRkIcRQTZ3XOVHdxryQQDJWxREcqFa6JDFLSLgIMUO9W3OZH79xhv4BOwCFpy7yrz+8j0hNkMKVidlgTN1iZWVlGI1G9Ho9OTk52Gy2YW2OHj1KcnIyiYmJ5OXluV43m81kZGSQlJREamoq58+fB6Cvr497772XlJQUjEYjTz31FFarFYCDBw8SGRlJSkoKKSkp5ObmemJfhZg1Gtp7+Ys3zhI3L5j/8xcPciBrFQ0dveT+6xnsdrk/oJh6bsPFbreTk5PDsWPHaGhowGw2c+TIkSFtenp62LNnD+Xl5dTW1lJaWkp5eTkA+/fvZ+XKldTX17Nv3z52794NQHBwMO+99x6VlZWcO3eOK1euDNnuli1bqKyspLKykvz8fE/usxAz3t8drwag4PuprIgP54lUHXseXconX12l6IxJ4erEbOA2XCoqKoiLi3Od5p+dnU1RUdGQNidOnGD9+vXExsbi7+/Pjh07XG2KiorIyckBYNOmTdTV1dHZ2Ymfnx8ajQYAq9WKxWJBpVJ5dOeEmI0+arjCqforZD94D0uiNa7Xcx66h8Toubz0bj1Wm13BCsVs4DZcTCYTOp3O9TwhIYHm5uYxt7lzmVarxWT6019O9913H9HR0YSFhbFt2zbX68ePH2f16tU8+uijVFRU3LW2/Px8DAaD69HV1eVud4SY8Qo/uIgmyJ+n0pcMeT1A7ceP1utp6b5ByReXFKpOzBZuw8XhcN8/O5Y2Izl9+jQtLS1cvXqVsrIyAB577DEaGxv5/PPPycvLY8uWLfT29g5bNzc3l5qaGtcjIiJiwnUIMRO0dN+gtLadLWviCZsTMGz5t1bHERMaxOGPG6e/ODGruA0XnU435Eilqalp2GXtR2uj1WqHLDOZTMTHxw9ZX6PR8K1vfYvjx48DEBUVRXBwMADp6elotVpqa2vHu29CzDpHK5qxO+B79yXcdXmgvx+Pr4mnorGL5qvXp7k6MZu4DZfU1FRMJpPrUvaFhYVkZmYOabNx40ZKS0tpa2vDarVy6NAhV5vMzEwKCgoAKCkpQa/XExUVRXt7O93d3QBYLBZKSkowGo0AtLa2urZdXV1NY2MjS5YMPcQXQgxXcq4NQ2wYy2PDRmzz7ZQ4AIo/bx2xjRCT5fY8F7VaTUFBAVlZWVgsFtLT09m+fTvFxcUUFxdTUFBAeHg4Bw4cIC0tDbvdztatW9mwYQMAe/fuZdu2bSQlJREaGsrhw4eBwQDZtWsXNpsNm83Gpk2bXAP/r776Km+99RYBAQEEBARw8OBB6fISwo2G9l7q23t57pHkUdsZYsNIitFw/PNWcjfop6k6MduoHJMZMPEyBoNBbhYmZq380gYOvFPLO3+ZxtKFoaO2PfDOl+SXXuDD5x8mfl7wNFUoZprRvnPl2mJCzBB/qL7EPVFzSV6gcdv24WUxAJTVtk91WWKWknARYgbovn6TqpYeNiyNGdP5Yim6COaFBFD6pYSLmBoSLkLMAJ981YnDAev0kWNqr/ZTkZ4czYcNnfQPDL+ckxCTJeEixAzwYUMnaj8V994zf8zrPJQUzY0BG583d09dYWLWknARYgb48MIVVmvDCb3LiZMjue9WEH168epUlSVmMQkXIXzcpZ5+vuroY50+alzraSOCiQufw6eNEi7C8yRchPBxpy92AvBA4tjGW5xUqsFutM++7mJALmQpPEzCRQgfd+brLtR+KlJ088a97n1LIrl+08YXLT2eL0zMahIuQvi4M03dLFsYSkjg+G8se6+Mu4gpIuEihA+7cdPG+TYzaxLmTWj9JVFziQgJoFJmjAkPk3ARwoeda+nBanewNmFi195TqVSs1s2T6cjC4yRchPBhZ5oGb5A30XABWK2dR2tPP+3mfk+VJYSEixC+7MzXXcyfG8iiyJAJb8M5EeBzkwzqC8+RcBHCh1U2d5Oimzem64mNZJU2HEC6xoRHSbgI4aPar/XTfs3CyvjwSW0nUhOEbn4wn5u6PVOYEEi4COGzqlvNABjjRr7r5Fit1g4O6tvtM+b2TkJhEi5C+KjqWyc+rpjkkQsMdo2Z+618ffX6pLclBIwxXMrKyjAajej1enJycrDZhl+i++jRoyQnJ5OYmEheXp7rdbPZTEZGBklJSaSmpnL+/HkA+vr6uPfee0lJScFoNPLUU09htVoBsFqt7Nq1C71ej9Fo5NSpU57YVyFmlOpWMxEhAcSGz5n0toxxgwF1vs086W0JAWMIF7vdTk5ODseOHaOhoQGz2cyRI0eGtOnp6WHPnj2Ul5dTW1tLaWkp5eXlAOzfv5+VK1dSX1/Pvn372L17NwDBwcG89957VFZWcu7cOa5cueLa7sGDB7FYLDQ0NPDGG2+QnZ3NDLobsxAe8UVrDyviwyc1mO+0PHawa62mVcJFeIbbcKmoqCAuLg6DwQBAdnY2RUVFQ9qcOHGC9evXExsbi7+/Pzt27HC1KSoqIicnB4BNmzZRV1dHZ2cnfn5+aDSDt2O1Wq1YLBbXL8nt66xatYrIyEjOnj3roV0Wwvf1XB+g+eoNDB4YbwGYPzeQhWFz5MhFeIzbcDGZTOh0OtfzhIQEmpubx9zmzmVarRaTyeR6ft999xEdHU1YWBjbtm0b83sC5OfnYzAYXI+uri63OyzETFDddmu8JW7y4y1OhrgwaiRchIe4DZexdEdNpsvq9OnTtLS0cPXqVcrKysa1vdzcXGpqalyPiIiJn6UshC+p8eBMMSdDbBhtPf1c7bvpsW2K2cttuOh0uiFHDU1NTWi12jG30Wq1Q5aZTCbi4+OHrK/RaPjWt77F8ePHx/yeQsxmX7T0MDdQzeLIuR7bprOLTbrGhCe4DZfU1FRMJhM1NTUAFBYWkpmZOaTNxo0bKS0tpa2tDavVyqFDh1xtMjMzKSgoAKCkpAS9Xk9UVBTt7e10d3cDYLFYKCkpwWg0DlunqqqKjo4O1q5d65k9FmIG+PLSNZbHhuHnN/nBfCcZ1Bee5PYGEGq1moKCArKysrBYLKSnp7N9+3aKi4spLi6moKCA8PBwDhw4QFpaGna7na1bt7JhwwYA9u7dy7Zt20hKSiI0NJTDhw8D0Nrayq5du7DZbNhsNjZt2uQaxN+5cycffPABer2ewMBACgsLPTIjRoiZwGqz81VHH9/5M88ezS+aH0JIoFqOXIRHqBwzaI6vwWBwHWEJMVN91dHLw/9Uzn9/zED2g/d4dNvf+fVH9FmsnPjLNI9uV8xMo33nyhn6QviY+vZeAJJiNB7f9vLYUOrbe7FYh58oLcR4SLgI4WManOGywPPhsnRhGDa7g4tX+jy+bTG7SLgI4WPqL19DE+TPwrDJX/blTsm3jobqLvd6fNtidpFwEcLH1Lf3oo/RTMkkl+QFoQDUXbrm8W2L2UXCRQgfYrc7uNDROyXjLQARcwOJDg2i7rKEi5gcCRchfEhL9w36B+xTMt7ilLxA45o0IMRESbgI4UPq2wePKJJiQqfsPZIXhNLY2Uf/gMwYExMn4SKED6m/NdCun6JuMRgMF4fjT7PShJgICRchfEh9ey9zAvyInxc8Ze/hGtSXcRcxCRIuQvgQ50wxT15T7E7O8RyZjiwmQ8JFCB/hcDhouHxtSsdbAMLmDN46uV6OXMQkSLgI4SPaevrpu2mb0vEWp+QFodRKuIhJkHARwkc0TOE1xe6UvECDqesGfRbrlL+XmJkkXITwEa4LVi6Y2m4x+NOgvswYExMl4SKEj2hov0agvx+6iKmbKebkDBfpGhMTJeEihI+ov9zLkqi5+Kun/tc28VbX24UOOXIREzOmn9KysjKMRiN6vZ6cnBxstuFn7h49epTk5GQSExPJy8tzvW42m8nIyCApKYnU1FTOnz8PQGVlJevWrcNoNLJixQpeeeUV1zoHDx4kMjKSlJQUUlJSyM3Nnex+CuHTHA6HaxrydNAE+RMbPocL0i0mJshtuNjtdnJycjh27BgNDQ2YzWaOHDkypE1PTw979uyhvLyc2tpaSktLKS8vB2D//v2sXLmS+vp69u3bx+7duwEICQnh9ddfp7q6mo8++ohf/epXVFZWura5ZcsWKisrqaysJD8/34O7LITv6ei10HNjYMqnId9OH6ORMRcxYW7DpaKigri4OAwGAwDZ2dkUFRUNaXPixAnWr19PbGws/v7+7Nixw9WmqKiInJwcADZt2kRdXR2dnZ0kJyezdOlSAMLCwli+fDnNzc0e3TkhZoqGy1N3g7CRJEZraLp6Xa4xJibEbbiYTCZ0Op3reUJCwrAQGK3Nncu0Wi0mk2nI+hcuXOCPf/wj69atc712/PhxVq9ezaOPPkpFRcU4d0uImWUqb208En2MBrsDGjvlrpRi/NyGi8PhcLuRsbQZSXd3N48//jgvv/wy8+fPB+Cxxx6jsbGRzz//nLy8PLZs2UJv7/DD8/z8fAwGg+vR1dU14TqE8GYN7b34+6lYFDl32t4zMVrjem8hxsttuOh0uiFHKk1NTWi12jG30Wq1Q5aZTCbi4+MBuH79Ops3b+aHP/whTzzxhKtNVFQUwcGD0y3T09PRarXU1tYOqy03N5eamhrXIyIiYkw7LYSvqW+/xuKouQT6T98ET+fkAQkXMRFuf1JTU1MxmUzU1NQAUFhYSGZm5pA2GzdupLS0lLa2NqxWK4cOHXK1yczMpKCgAICSkhL0ej1RUVEMDAyQmZnJI488wrPPPjtke62tra5/V1dX09jYyJIlSya3p0L4sIb2qbv75EiiNIGEBwdwoUO6xcT4+btroFarKSgoICsrC4vFQnp6Otu3b6e4uJji4mIKCgoIDw/nwIEDpKWlYbfb2bp1Kxs2bABg7969bNu2jaSkJEJDQzl8+DAwOHX55MmTXLp0iTfffBOAF154gaysLF599VXeeustAgICCAgI4ODBg3JUImatq303udJ7c9qmITupVCqZMSYmTOWYzICJlzEYDK4jLCFmik8vXmXrbz7m5SdT+HZK/LS+909/X8WblS3U7NuIegov8y9802jfuXKGvhBebjpubTwSfYwGi9VOS9eNaX9v4dskXITwcvWXe/FTwZLo6Zsp5pQYM/ieDR1yjTExPhIuQni5hvZeEuaHMCdAPe3vrY+WqyOLiZFwEcLLNbT3olegSwwgPiKYIH8/CRcxbhIuQngxc/8Al8z903rZl9up/VQsiZYZY2L8JFyE8GLTeffJkehjNFzo6JvUlTjE7CPhIoQXc16wcrrPcbmdPlpDz40BrvTeVKwG4XskXITwYs5pyM7rfCnBNWNMusbEOEi4COHF6tt7iZ8XzNwgtxfTmDKua4zJXSnFOEi4COHF6i/3KjaY73RP1Fz8VMhdKcW4SLgI4aX6LFZaum8oOpgPEOSvJmF+iHSLiXGRcBHCS31162rESlz25U5yAUsxXhIuQngp52C+XuFuMYDEGA2XzP30WqxKlyJ8hISLEF7KeWtjJachOzlnq8m4ixgrCRchvFT95V4WhAURNidA6VLkrpRi3CRchPBSDe3XvGK8BWQ6shg/CRchvFD/gI2mq9e9oksMIGxOADGhQXLkIsZsTOFSVlaG0WhEr9eTk5ODzWYb1ubo0aMkJyeTmJhIXl6e63Wz2UxGRgZJSUmkpqZy/vx5ACorK1m3bh1Go5EVK1bwyiuvuNaxWq3s2rULvV6P0Wjk1KlTk91PIXzKVx192B0ofo7L7fQxGhlzEWPmNlzsdjs5OTkcO3aMhoYGzGYzR44cGdKmp6eHPXv2UF5eTm1tLaWlpZSXlwOwf/9+Vq5cSX19Pfv27WP37t0AhISE8Prrr1NdXc1HH33Er371KyorKwE4ePAgFouFhoYG3njjDbKzs+WieWJWUfLukyPRx2j4+up1blrtSpcifIDbcKmoqCAuLg6DwQBAdnY2RUVFQ9qcOHGC9evXExsbi7+/Pzt27HC1KSoqIicnB4BNmzZRV1dHZ2cnycnJLF26FICwsDCWL19Oc3PzsHVWrVpFZGQkZ8+e9dAuC+H9vOFqyHfSx2iw2R00dvYpXYrwAW7DxWQyodPpXM8TEhJcITCWNncu02q1mEymIetfuHCBP/7xj6xbt27M7wmQn5+PwWBwPbq6utztjhA+oaG9lyhNIBFzA5UuxcU5/lN/WbrGhHtuw2Us3VGT6bLq7u7m8ccf5+WXX2b+/Pnj2l5ubi41NTWuR0RExITrEMKb1Lf3es1gvpOzi87ZZSfEaNyGi06nG3LU0NTUhFarHXMbrVY7ZJnJZCI+Ph6A69evs3nzZn74wx/yxBNPjOs9hZipblrtNF7p87pwidIEMi8kwHVypxCjcRsuqampmEwmampqACgsLCQzM3NIm40bN1JaWkpbWxtWq5VDhw652mRmZlJQUABASUkJer2eqKgoBgYGyMzM5JFHHuHZZ58dsr3b16mqqqKjo4O1a9dOfm+F8AFfd/ZhtTu8ajAfQKVSkRwT6rqBmRCjcRsuarWagoICsrKySExMRKPRsH37doqLi12D7uHh4Rw4cIC0tDSWLl1Keno6GzZsAGDv3r1UVVWRlJTECy+8wGuvvQYMTl0+efIkb775JikpKaSkpPD73/8egJ07dxIQEIBer+fJJ5+ksLAQlUo1VZ+BEF7Fmy77cif9Ag1fXenFapMZY2J0KscMmuNrMBhcR1hC+KqX3q3jpXfr+TTvG8SEzlG6nCH+14cX+bvjNbz7XLpXhp+YXqN958oZ+kJ4mfrLvcwLCSBaE6R0KcM4u+oaZFBfuCHhIoSXqW+/RnJMqFd2BTuvGCDTkYU7Ei5CeJEBm52LV/q86rIvt4sJDSJsjr/MGBNuSbgI4UUar/QxYHN41Zn5t1OpVCQtCJVwEW5JuAjhRepudTclL/Cuaci3S4rRcKGjF5t9xswFElNAwkUIL+K6YKUXh4s+RsNNq52mq9eVLkV4MQkXIbxI/eVeIkICiNJ4zzXF7uQMvvrLMmNMjEzCRQgvUnd58O6T3jhTzCnZOWNMxl3EKCRchPASN63ePVPMaWHYHDRB/nJXSjEqCRchvITzmmLePJgPgzPG9DEauTqyGJWEixBewjlTzNuPXGBwxlhDey92mTEmRiDhIoSXqLvsfbc2HknSAg39A3ZMXTeULkV4KQkXIbxEQ7v3zxRzcs0Yk64xMQIJFyG8RN3layQt8O6ZYk7OKwjIjDExEgkXIbyAc6ZYsg+MtwDEhQcTEqim7pIcuYi7k3ARwgs0eundJ0fi56di6cJQvpRwESMYU7iUlZVhNBrR6/Xk5ORgs9mGtTl69CjJyckkJiaSl5fnet1sNpORkUFSUhKpqamcP3/etWzr1q1ER0ej1+uHbOvgwYNERka67lCZm5s70f0Twic4v6SXLfSNcAFYtjCMhvZeBuSulOIu3IaL3W4nJyeHY8eO0dDQgNls5siRI0Pa9PT0sGfPHsrLy6mtraW0tJTy8nIA9u/fz8qVK6mvr2ffvn3s3r3btd4zzzzDO++8c9f33bJlC5WVlVRWVpKfnz+ZfRTC651vMwODX9i+YnlsKDdv3SJAiDu5DZeKigri4uIwGAwAZGdnU1RUNKTNiRMnWL9+PbGxsfj7+7Njxw5Xm6KiInJycgDYtGkTdXV1dHZ2AvDwww8zf/58j+6QEL7oyzYzceFzCA8JULqUMXMGoTMYhbid23AxmUzodDrX84SEBJqbm8fc5s5lWq0Wk8nktrDjx4+zevVqHn30USoqKtzviRA+7MtL11gW6ztHLQBLb3XhybiLuBu34eJwuD8DdyxtxuOxxx6jsbGRzz//nLy8PLZs2UJv7/Apj/n5+RgMBtejq6vLo3UIMR26r9+kraef5bG+M94CEB4cQPy8YDlyEXflNlx0Ot2QI5Wmpia0Wu2Y22i12iHLTCYT8fHxo75nVFQUwcHBAKSnp6PVaqmtrR3WLjc3l5qaGtcjIiLC3e4I4XXOtzkH833ryAUGJyB82SZHLmI4t+GSmpqKyWSipqYGgMLCQjIzM4e02bhxI6WlpbS1tWG1Wjl06JCrTWZmJgUFBQCUlJSg1+uJiooa9T1bW1td/66urqaxsZElS5aMb8+E8BFfXhr8y9/XjlwAlsWGcsncT1ffTaVLEV7Gbbio1WoKCgrIysoiMTERjUbD9u3bKS4udg3Uh4eHc+DAAdLS0li6dCnp6els2LABgL1791JVVUVSUhIvvPACr732mmvbmzdv5oEHHqCxsRGtVsuLL74IwKuvvorRaCQlJYWdO3dy8OBBOSoRM9aXbdcI9PdjceRcpUsZt+W3xolk3EXcSeXw9ICJggwGg+sISwhf8e1XP8DugON/8aDSpYxbQ3sv3/xlOf8jw8CudfcoXY6YZqN958oZ+kIoyGZ3UHv5mk+dPHm7xZEhBPn7ybiLGEbCRQgFNXb20T9g97lpyE7+aj+SF4S6xo2EcJJwEUJBzr/4l/vokQsMzhirvXwNm9w4TNxGwkUIBbku++KjRy4wWHv/gFwGRgwl4SKEgqpbe4gNn8P8ud5/g7CRGOMGg7G6tUfhSoQ3kXARQiEOh4NzLWZWxIcrXcqkOMPlnEnCRfyJhIsQCrlstnCl18JKHw+X0DkBLImay7kWCRfxJxIuQijE+WXs6+ECsCI+nOpWM3YZ1Be3SLgIoRBnuBjjfXcw32llfDi9FiuNnTKoLwZJuAihkC9aelgQFkRM6BylS5k057iRdI0JJwkXIRRyrqVnRnSJwZ+Ovqpb5WRKMUjCRQgFXDb303HN4vMzxZzC5gRwT9RcmTEmXCRchFDAFzNoMN9pRXw4X7T2ePzmgcI3SbgIoYCZNFPMaWV8GNf6rXzdeV3pUoQXkHARQgFVpluD+WG+P5jv5Oziq5JBfYGEixDTzuFwcLapizW6mXUDvJXx4ahUUNnUrXQpwgtIuAgxzRo7r9N1fYA1CfOULsWjQucEsHRBKGeaupQuRXiBMYVLWVkZRqMRvV5PTk4ONpttWJujR4+SnJxMYmIieXl5rtfNZjMZGRkkJSWRmprK+fPnXcu2bt1KdHQ0er1+yLasViu7du1Cr9djNBo5derURPdPCK9z9taX79pFM+vIBWBNQgTVrT30Dwz/jhCzi9twsdvt5OTkcOzYMRoaGjCbzRw5cmRIm56eHvbs2UN5eTm1tbWUlpZSXl4OwP79+1m5ciX19fXs27eP3bt3u9Z75plneOedd4a958GDB7FYLDQ0NPDGG2+QnZ0tM1DEjHGmqQt/PxUr4mbOYL7T2oR5DNgccoVk4T5cKioqiIuLw2AwAJCdnU1RUdGQNidOnGD9+vXExsbi7+/Pjh07XG2KiorIyckBYNOmTdTV1dHZ2QnAww8/zPz584e95+3rrFq1isjISM6ePTuJ3RTCe5xt6mZ5bBjBgWqlS/E459HYma+7lS1EKM5tuJhMJnQ6net5QkICzc3NY25z5zKtVovJZJr0ewrhi67ftPLlpWusnWHjLU5LouYyLyRAxl0E/u4ajKU7ytNdVmPdXn5+Pvn5+a7nXV3yAy28W5WpB5vdwZqEmTfeAqBSqVijm8eZpi4cDgcqlUrpkoRC3B656HS6IUcNTU1NaLXaMbfRarVDlplMJuLj4yf9ngC5ubnU1NS4HhERM/MXVswcZ29N0107Q8MFBvftstlCa0+/0qUIBbkNl9TUVEwmEzU1NQAUFhaSmZk5pM3GjRspLS2lra0Nq9XKoUOHXG0yMzMpKCgAoKSkBL1eT1RU1Kjvefs6VVVVdHR0sHbt2vHvnRBe5tOLnURpgtDND1a6lCnjHHf57GvpSZjN3IaLWq2moKCArKwsEhMT0Wg0bN++neLiYtege3h4OAcOHCAtLY2lS5eSnp7Ohg0bANi7dy9VVVUkJSXxwgsv8Nprr7m2vXnzZh544AEaGxvRarW8+OKLAOzcuZOAgAD0ej1PPvkkhYWFcngtfJ7N7uCPjV3cv2T+jP55TtHNQ+2nouLiVaVLEQpSOWbQHF+DweA6whLC25wz9ZDx6gf8z8dXsP3+RUqXM6W2vPYhvf1WTj6XrnQpYgqN9p0rZ+gLMU1OXxycgn//PcOn38809y+JpL69lyu9FqVLEQqRcBFimnzy1VUi5waij9EoXcqUu39JJACffNWpcCVCKRIuQkwDm93Bpxc7ufeemT3e4pS6KAK1n0rCZRaTcBFiGnx5yYy538p9s6BLDGBukD+rtOF88pUM6s9WEi5CTAPnl+x9t7qLZoP7l0TS0N5LxzUZd5mNJFyEmAbv13UQpQli6YJQpUuZNs5xl48uXFG4EqEECRchplj/gI3TFztJS4rCz2/mj7c43bt4PoH+fpTXdShdilCAhIsQU6yi8Sr9A3bSl0YrXcq0Cg5Uc/+SSN6v68BunzGn04kxknARYoqV13agUsGD+tEvezQTrU+O5krvTapbzUqXIqaZhIsQU+z9+g5WxocTqQlSupRpt/7W0VpZbbvClYjpJuEixBRq67lB3eVe0pJmV5eY0z1Rc0mYH0KZjLvMOhIuQkyhkzWXAdiwLEbhSpShUqnYsDSas01ddF+/qXQ5YhpJuAgxhd6pvkRMaBBrdPOULkUx65fFYHdAqXSNzSoSLkJMke7rN/nkq6s8Ylgwq6Yg3+nPEyMJnePPf5y7pHQpYhpJuAgxRf7zfDs2u4P/alyodCmKCvJX883lCyiv66DPYlW6HDFNJFyEmCLvVF8idI6/60z12ey/rViIxWqXrrFZRMJFiClwrX+A8roOvrEshkB/+TVLS45mbqBausZmkTH91JeVlWE0GtHr9eTk5GCz2Ya1OXr0KMnJySQmJpKXl+d63Ww2k5GRQVJSEqmpqZw/f97tOj/72c+IjY0lJSWFlJQU/v7v/34y+yjEtPuPLy5hsdr59pp4pUvxCnMC1Dy8fAHvfdkuXWOzhNtwsdvt5OTkcOzYMRoaGjCbzRw5cmRIm56eHvbs2UN5eTm1tbWUlpZSXl4OwP79+1m5ciX19fXs27eP3bt3u10HIDc3l8rKSiorK4cEjxC+4N/PtBClCeShWXhW/kgeT4njxoCN//hCjl5mA7fhUlFRQVxcHAaDAYDs7GyKioqGtDlx4gTr168nNjYWf39/duzY4WpTVFRETk4OAJs2baKuro7Ozs5R1xHCl7V23+CTi51krI7DXy1dYk5pydFEaYL4/WfNSpcipoHbn3yTyYROp3M9T0hIoLm5ecxt7lym1WoxmUxut/sv//IvrFq1ii1btlBfX3/X2vLz8zEYDK5HV1eXu90RYsq9VdmKwwGZa7RKl+JVAtR+bFkTxydfXaX56nWlyxFTzG24OBzur2Y6ljbjWeeZZ57hwoULVFVV8eSTT5KRkXHXdrm5udTU1LgeERER465DCE+y2x38rqKJpBgNK+LDlC7H63znzwYD9/efmRSuREw1t+Gi0+mGHFE0NTWh1WrH3Ear1Q5ZZjKZiI+PH3WdhQsXEhAQAMB3v/tduru76eyUe3EL73eq4QqNndfZ/sAiVKrZe+LkSJYtDGOVNpyjf2xmwGZXuhwxhdyGS2pqKiaTiZqaGgAKCwvJzMwc0mbjxo2UlpbS1taG1Wrl0KFDrjaZmZkUFBQAUFJSgl6vJyoqatR1WltbXdsuLS0lICCA+fNnx73HhW87/HEjcwPVbJFZYiPa+eeLaevp54QM7M9o/u4aqNVqCgoKyMrKwmKxkJ6ezvbt2ykuLqa4uJiCggLCw8M5cOAAaWlp2O12tm7dyoYNGwDYu3cv27ZtIykpidDQUA4fPgww6jrPP/88Z8+eRa1WExoayr/927/JX4HC6zVfvc5/ftnOtvsSCJ0ToHQ5Xmvzqlh+XvIlr394kYzVcUqXI6aIyjGRARMvZTAYXEdYQky3v33rC3778de8+1wa+phQpcvxai+/W88/v1vHv+/+c9YkyFiprxrtO1fmSQrhAe3X+vnfFc1sNC6UYBmDbfcnEOTvx2tlF5QuRUwRCRchPKDw1EVuWu3kbtArXYpPiNIEse2+RZysuUyVqVvpcsQUkHARYpIu9fRz6ONG0pOjWakNV7ocn/Gj9YnMCfDjn0/WKV2KmAISLkJM0i9P1mKx2tm7canSpfiU6NAgdjywmNLaDj69eFXpcoSHSbgIMQk1rWaOfWbiO2u1GOPkqGW8frQ+kYiQAP72rS+wynkvM4qEixATZLM7+Jt/P0dwgJo9j8pRy0TMCwnkpxuX8eWla/z246+VLkd4kISLEBP0vz68yOfN3fx04zIWhs9RuhyftTVVx2rdPH55sk6uOTaDSLgIMQF1l6/xiz/Ukroogu33L1K6HJ/m56fiQNYqBmx2fvK/z0r32Awh4SLEOPVZrPzoyGcE+PnxiydW4+cnV4+YrOQFofx/m5Zzpqmbl969+1XQhW+RcBFiHGx2B88dreRCRx8HnljF4qi5Spc0Y3z/gUU8YljAq6UNvFXZonQ5YpIkXIQYI4fDwc+Kq3mn+jI/3qBn44pYpUuaUVQqFf/83RSWx4bx18eq+PiCXAndl0m4CDEGdvtgsBz+5Guy/kzLXz2arHRJM5ImyJ/Xd6YSpQlk18FP+aD+itIliQmScBHCjf4BG//v0UoOffw1mWvj+YfMlXKV7ikUGx7M755+gChNED84VMGbZ6WLzBdJuAgxiotX+tjy2ke8VdnKD9bdwy+yVuOvll+bqaabH8LRpx8gMVrDX/6ukp8VV9M/YFO6LDEO8lsixF3ctNrJL21g40vvc/FKL//83dX8bYZBZoZNo7h5wfzbj/6cb6fEcfCjRv7by6f46IJ0k/kKtzcLE2I2sVht/P4zE6+VXqCl+wYPLInk77esYEm0RunSZqXgQDUvfTeFRw0L+R/FX/C9//80acnR/OQbSaxNmCfdk15sTEcuZWVlGI1G9Ho9OTk52GzDD0+PHj1KcnIyiYmJ5OXluV43m81kZGSQlJREamoq58+fn9Q6Qnia3e6gytTN3x2v5v6f/yd5//4FcwL8ePnJFP71h/dJsChMpVKxeVUs//ncenavT6Ti4lW+8+uP2PzKB/z240Yu9fQrXaK4C7d3orTb7SQnJ1NcXIzBYGDr1q1s3ryZHTt2uNr09PSwcuVKTp8+TXR0NGlpabz44oukp6fzwgsvYLfb+fnPf05JSQkHDhygtLR0Quu4I3eiFGNhsdqovXSNcy09fPZ1F+/XXeFKrwWVCtYlRvH/3JvAxhULUUsXmFe60mvhdxXNvPFpE6auGwAY48JYp49itXYeKQnziAufI0c102C071y34XL69Gn++q//mvfffx+Ad955h/z8fIqLi11tfve73/H222/z29/+FoDf/OY3VFdX88orr7B8+XLefvttlixZAkB8fDxVVVW8++67414nMjJywjsqZiaHw8GAzYHFasNitdM/YKPnxgA91wfovjFA1/WbdPXdpKX7Bs1Xb9DcdZ2WrhtY7YM/9kH+fty/JJL1S6N5xLAAbUSIwnskxspud/BZUxfvfdnOe+fbqb18zbVsbqCaRZFzWRwVQvy8YKI0QURqgojUBBIREsjcQDXBgWpCAv0JCVQT5O8nYTQBo33nuh1zMZlM6HQ61/OEhASam5vdtjlx4sRdl2m1Wkwm04TWcRcuE2WzO3jyXz52Pb8zbm9/emcW35nMo61758LR1nXcsXTYdoe0vXPZyH8vDK9v5Pdxt13HiE9GX9f9Z+YYeZkDLFY7FquNm1Y7FuvYrkMVqPYjPiKYhPkhPJQUhTEunJXx4SQvCCXQX+a1+CI/PxX/ZfF8/svi+fx04zJ6bgxwztTD56ZuLnT08nXndU5/dZXOvpvut6WCIH81/n4q1GoV/n4q/P38UPup8FerBv/rp8LvVgA5g0gFODNJpQIVqtv+/aeFQ9qNsL4Svrl8AU+nJ07Jtt2Gi5sDmzG38cQ6d8rPzyc/P9/1vKura8LbunHHNEfnD4nruer2ZXe446dDdce/XYtVd26VYX8tDVn3jsbD1h6lptHWHbZsHG3vdHv97mu4+3ru1x1aT6C/H0Gux+BfnYG3PQ8PDiA8JIB5wQFEzA1kXnAAUZogmek1w4UHB/BgUhQPJkUNed1itXG17yadvTe50muh6/pNrt+0ceOmjT6LjesDVm7ctGEZsGNzOLDa7FjtDmx2xx3/tWOzO1x/8Dhc/zP4B9Htrzscf/oTyeFwvjb4Pw7nGo7hf9xNN+cR/FRwGy46nW7IkUpTUxNarXZYm6qqqru20Wq1NDc3u7q4TCYT8fHxE1rnTrm5ueTm5rqeGwwG93t8F2o/Ff/nLx6a0LpCCO8W5K8mNjyY2PBgpUuZVdz2B6SmpmIymVz9aoWFhWRmZg5ps3HjRkpLS2lra8NqtXLo0CFXm8zMTAoKCgAoKSlBr9cTFRU1oXWEEEL4BrdHLmq1moKCArKysrBYLKSnp7N9+3aKi4spLi6moKCA8PBwDhw4QFpaGna7na1bt7JhwwYA9u7dy7Zt20hKSiI0NJTDhw8DTGgdIYQQvsHtbDFfIrPFhBBi+oz2nSvTZIQQQnichIsQQgiPk3ARQgjhcRIuQgghPG5GDeiHhYUNOwfH23R1dREREaF0GV5JPpuRyWczMvlsRjeVn4/JZMJsNt912YwKF18gM9pGJp/NyOSzGZl8NqNT6vORbjEhhBAeJ+EihBDC4yRcptnt10ITQ8lnMzL5bEYmn83olPp8ZMxFCCGEx8mRixBCCI+TcBFCCOFxEi7TpLm5mW984xssX74co9HI3/zN3yhdklfKzc3F39/txbpnlb6+Pnbs2MHSpUtZtmwZv/nNb5QuyWscOXKEVatWkZKSwkMPPURtba3SJSnmJz/5CVqtdtjvz/PPP49eryc5OZmioqLpK8ghpkVra6ujoqLC4XA4HBaLxfHggw863nzzTYWr8i7vv/++4/vf/75DrVYrXYpXefrppx3/+I//6HA4HA673e64fPmywhV5h76+Psf8+fMdHR0dDofD4fj1r3/tyMrKUrgq5Zw6dcrR1tY25Pfn5MmTjoceeshhtVodJpPJodPpHNeuXZuWeuTIZZrExsaSmpoKQGBgIGvWrKGpqUnhqryHxWLh+eef5xe/+IXSpXiVa9euUVxczHPPPQcM3h46JiZG4aq8g91ux+Fw0NvbC0BPTw+xsbEKV6WcBx98kIULFw55raioiJ07d6JWq4mPj2fdunX84Q9/mJZ6pP9BAVevXuXNN9+ctv+TfcG+ffvIzs4mOjpa6VK8yldffcWCBQv48Y9/zKeffsqiRYt46aWXWLRokdKlKU6j0fDqq6+yYsUKwsPDCQ8P5+OPP1a6LK9iMpmG3Dk4ISFhyG3rp5IcuUyzmzdvkpWVxU9+8hOWLVumdDleoaqqitOnT7Nr1y6lS/E6VquVyspKsrKyOHPmDBkZGfzgBz9QuiyvMDAwwGuvvUZFRQUtLS1kZWXx05/+VOmyvIpDwTNNJFymkc1m43vf+x4pKSn81V/9ldLleI0PP/yQmpoa7rnnHhYvXozNZmPx4sUjXhBvNtFqtURGRvLNb34TgCeffJLPPvtM4aq8Q2VlJQ6Hg+XLlwODn81HH32kcFXeRafTDTlSaWpqmraL+0q4TKOnnnqK0NBQ/umf/knpUrzKj370I1pbW2lsbKSxsRG1Wk1jYyNhYWFKl6a4BQsWYDQaOXPmDAAnT57EaDQqXJV30Gq11NbW0tLSAgx+NgaDQeGqvEtmZiYHDx7EZrPR0tLCBx98wKOPPjot7y1jLtPkww8/5PXXX2fFihWsWbMGgB/84Ac8++yzClcmvN2vf/1rsrOz6evrY968eRQUFChdkleIjY3lH/7hH3jkkUcICAggOjqa119/XemyFPP000/z9ttvY7PZ0Gq1fPvb3yY/P5+TJ0+SnJyMn58fv/zlLwkNDZ2WeuTyL0IIITxOusWEEEJ4nISLEEIIj5NwEUII4XESLkIIITxOwkUIIYTHSbgIIYTwOAkXIYQQHifhIoQQwuP+L/4kAKwhJqNDAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 394;\n", + " var nbb_unformatted_code = \"prior = surv_model.make_pmf()\\nprior.plot()\";\n", + " var nbb_formatted_code = \"prior = surv_model.make_pmf()\\nprior.plot()\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "prior = surv_model.make_pmf()\n", + "prior.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Update It" + ] + }, + { + "cell_type": "code", + "execution_count": 384, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 2.000000e+03\n", + "mean 2.753817e-03\n", + "std 8.012930e-03\n", + "min 3.162278e-10\n", + "25% 3.578765e-08\n", + "50% 4.050083e-06\n", + "75% 4.583447e-04\n", + "max 5.187022e-02\n", + "dtype: float64" + ] + }, + "execution_count": 384, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 384;\n", + " var nbb_unformatted_code = \"ps = 1 / np.power(10, prior.qs)\\ndescribe(ps)\";\n", + " var nbb_formatted_code = \"ps = 1 / np.power(10, prior.qs)\\ndescribe(ps)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ps = 1 / np.power(10, prior.qs)\n", + "describe(ps)" + ] + }, + { + "cell_type": "code", + "execution_count": 385, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "182500000" + ] + }, + "execution_count": 385, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 385;\n", + " var nbb_unformatted_code = \"words_per_day = 10_000\\ndays = 50 * 365\\ndays * words_per_day\";\n", + " var nbb_formatted_code = \"words_per_day = 10_000\\ndays = 50 * 365\\ndays * words_per_day\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "words_per_day = 10_000\n", + "days = 50 * 365\n", + "days * words_per_day" + ] + }, + { + "cell_type": "code", + "execution_count": 386, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 2.000000e+03\n", + "mean 2.733730e-10\n", + "std 5.653541e-10\n", + "min 0.000000e+00\n", + "25% 0.000000e+00\n", + "50% 0.000000e+00\n", + "75% 5.216238e-11\n", + "max 2.015761e-09\n", + "dtype: float64" + ] + }, + "execution_count": 386, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 386;\n", + " var nbb_unformatted_code = \"from scipy.stats import nbinom\\n\\nk = days * words_per_day\\nn = 1\\n\\nlikelihood = nbinom.pmf(k, n, ps)\\ndescribe(likelihood)\";\n", + " var nbb_formatted_code = \"from scipy.stats import nbinom\\n\\nk = days * words_per_day\\nn = 1\\n\\nlikelihood = nbinom.pmf(k, n, ps)\\ndescribe(likelihood)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from scipy.stats import nbinom\n", + "\n", + "k = days * words_per_day\n", + "n = 1\n", + "\n", + "likelihood = nbinom.pmf(k, n, ps)\n", + "describe(likelihood)" + ] + }, + { + "cell_type": "code", + "execution_count": 387, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1.3558684208624722e-11" + ] + }, + "execution_count": 387, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 387;\n", + " var nbb_unformatted_code = \"posterior = prior * likelihood\\nposterior.normalize()\";\n", + " var nbb_formatted_code = \"posterior = prior * likelihood\\nposterior.normalize()\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "posterior = prior * likelihood\n", + "posterior.normalize()" + ] + }, + { + "cell_type": "code", + "execution_count": 388, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 388, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAZAAAADqCAYAAACMcRPfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjguMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/SrBM8AAAACXBIWXMAAAuJAAALiQE3ycutAAAmkUlEQVR4nO3de3SU52Hn8e9oRiChOxISkmYEtm5mBDauZaeuXS65tCxe4kRHIXEpVW2NvSnKSXLWlLYHztkemj3ZwjabphG7boVb1ngvEG0cJSEkTiLk+BJHsU1ILBBgB6QBcZNGGnTXXPYPMRNksGY0GmnmHX6fc+Yca97nnfd5B6wfz/O8z/OY/H6/HxERkRlKinUFRETEmBQgIiISEQWIiIhERAEiIiIRUYCIiEhEFCAiIhIRS6wrMFOZmZlYrdZYV0NE5I7gdDpxu923PWa4ALFarXR0dMS6GiIidwS73f6hx9SFJSIiEVGAiIhIRBQgIiISEQWIiIhERAEiIiIRUYCIiEhEFCAiknDeuzrIP7/yHlfco7GuSkIz3DwQEZHpDAxP8Nnn3uDa4Dj/7+0LtHzhURZY9G/luaBvVUQSyv9u7+La4DgbVy3l1KXr/OA3PbGuUsJSgIhIQvnury5yd14aX//s/SxOW8BL71yIdZUSlgJERBLGleujvHvRzYaVS1lgSWJ9ZT6vvdfL8Lgn1lVLSAoQEUkYb51zAfCRu3MBWH/PEsY9PtpvvC/RpQARkYTRfs5Fkgl+ryQbgOpliwE43tUfu0olMAWIiCSMjp4B7spLIyMlGYClWSkUZqXwTrdaIHNBASIiCcHv99N56TqVSzOmvL/als07Xf34/f4Y1SxxKUBEJCFcGxzHNTxBRcHUAFlZnMXAyAQ9A5pUGG0KEBFJCKcvXweg8gMBEgiUwHGJHgWIiCSEMzcCorwgfcr7FTd+VoBEnwJERBJCV98IJhPYFi+a8r4tZxEpyUmcvjwYo5olLgWIiCSErr5hlmamsNBinvJ+UpKJsvz0YAtFokcBIiIJwekaxpaz6LbHKvIzOHNlUE9iRVlYAXLs2DGqqqooKyvD4XDg9XpvKXPo0CEqKiooLS1l586dwffdbjebNm2ivLyc6upqTp48GTx2+fJlHn/8ce655x4qKyv53ve+F4VbEpE7jd/vp7tvGOvi1NseX56XxvC4l6uDY/Ncs8QWMkB8Ph8Oh4PDhw9z9uxZ3G43Bw8enFJmYGCA7du309bWRmdnJ62trbS1tQGwZ88eVq1axZkzZ9i9ezfbtm0LnldXV8eWLVs4deoU7777Lg8//HCUb09E7gR9Q+MMjXspWXz7Fsiy3Mn3u3qH57NaCS9kgLS3t1NUVITdbgegvr6e5ubmKWWOHj3KunXrKCwsxGKxUFdXFyzT3NyMw+EAYOPGjZw+fZre3l46Ozu5fPkymzdvBsBisZCbmxvVmxORO0O3awTgQ7uwAsFyXgESVSEDxOl0YrPZgj+XlJTQ3d0ddpkPHrNarTidTk6dOkV+fj5PPPEE999/P3/2Z39GX1/fLddvbGzEbrcHXy6XliQQkam6+yaD4YNPYAUsz00D4HyfAiSaQgZIOINOkQxMeTweXnnlFXbu3Mk777zD8uXL+cu//MtbyjU0NNDR0RF85eTkzPhaIpLYLt2YZV6UnXLb49mLkslIsXC+d2g+q5XwQgaIzWab0uLo6urCarWGXcZqtU455nQ6KS4uxmazYbfbWblyJQCf+9zneOutt2Z3NyJyR7p0Y+/z/IzbB4jJZGJZ7iJ1YUVZyACprq7G6XTS0dEBwP79+6mpqZlSZsOGDbS2ttLT04PH4+HAgQPBMjU1NTQ1NQFw5MgRysrKyMvLo7q6mvHx8WC4vPzyy1RVVUX15kTkznDZPUpu2oJp9z5ftjiNLnVhRZUlVAGz2UxTUxO1tbWMjY2xdu1atm7dSktLCy0tLTQ1NZGVlcXevXtZs2YNPp+PzZs3s379egB27NjBli1bKC8vJyMjgxdeeAGApKQk9u3bx+OPP47H46G4uJjnn39+bu9WRBLSZfco+Zm3b30ElOQu4vu/7uH66ERwuXeZHZPfYDNr7HZ7sDUkIgKwZk8rpUvS+NcnH/rQMi++eZ6d3/4NP/zymluWfJcPN93vXM1EFxFD8/v9XHKPUhCiBVKUPTnJ8GL/yHxU646gABERQxsYmWDc4wsZIMU3AuSCAiRqFCAiYmiBJ7BCBUhh1uRxtUCiRwEiIoZ22T25vtXSrIXTlstISSYzxaIAiSIFiIgY2uWB6eeA3KwoO5WL/draNloUICJiaJfD7MKCyXEQjYFEjwJERAytd2gckwkWpy0IWbYoO5XL7lG8PkPNXohbChARMbRrg2MsXrQAc5IpZNmi7FQ8Pj9Xr2tfkGhQgIiIofUNjZObHrr1Ab9bbFHdWNGhABERQ+sdHCc3bfonsAKKNZkwqhQgImJovUNjM2iBaDJhNClARMSwfD7/ZBdWGAPoAPkZCzGZfrd/iMyOAkREDKt/ZAKfH3LTw+vCspiTyEtfGHz0V2ZHASIihtU7OPk0VbhdWABLM1MUIFGiABERw7o2OA4Q9iA6TE44DCx/IrOjABERw+odmgyCvBm0QAoyJ7uwfJpMOGsKEBExrL6hyRZIOLPQA5ZmpuDx+ekbHp+rat0xFCAiYljBLqwwB9EBCm4s664nsWZPASIihtU7OEay2URmiiXsc5beWHRRA+mzF1aAHDt2jKqqKsrKynA4HHi93lvKHDp0iIqKCkpLS9m5c2fwfbfbzaZNmygvL6e6upqTJ08Gj5lMJlavXh189fb2RuGWROROEZiFbjKFXgcroCAYIBpIn62QAeLz+XA4HBw+fJizZ8/idrs5ePDglDIDAwNs376dtrY2Ojs7aW1tpa2tDYA9e/awatUqzpw5w+7du9m2bVvwPLPZzPHjx4Ov3NzcKN+eiCSymcxCDwi0QC6pBTJrIQOkvb2doqIi7HY7APX19TQ3N08pc/ToUdatW0dhYSEWi4W6urpgmebmZhwOBwAbN27k9OnTammISFT0Do3PaAAdIDPVQkpyUnAjKolcyABxOp3YbLbgzyUlJXR3d4dd5oPHrFYrTqcTmGzdPPjggzzwwAN87Wtfu+31GxsbsdvtwZfL5ZrB7YlIIpvswppZgJhMJgoyU9QCiYKQI09+f+hnpcMpczvnz5/HZrPR29vLpz71KQoLC3niiSemlGloaKChoSH4c6AlJCJ3Nq/Pj3t0guxFMwsQCEwmVIDMVsgWiM1mm9Li6Orqwmq1hl3GarVOOeZ0OikuLg6eB5Cbm8uWLVt4/fXXZ3ErInIncY9M4PdDTgQBouVMoiNkgFRXV+N0Ouno6ABg//791NTUTCmzYcMGWltb6enpwePxcODAgWCZmpoampqaADhy5AhlZWXk5eXhcrkYHZ38AxwdHaWlpYV77703qjcnIomrf2QCgOxFyTM+d2lWCq7hCUYnbn2iVMIXMkDMZjNNTU3U1tZSWlpKeno6W7dupaWlJTg4npWVxd69e1mzZg2VlZWsXbuW9evXA7Bjxw5OnDhBeXk5u3btYt++fQCcOnWKBx98kPvuu48HHniAVatWUV9fP4e3KiKJxHVjJnkkAZKfMTnxUFvbzo7JH+kARozY7fZga0hE7lytp67w5L+1c+Cph1hbsWRG537vxEW+8L/e4fDnH+bB5YvnqIaJYbrfuZqJLiKGFGiB5ETShZWp5UyiQQEiIobkGp4cA4lkEL1Ay5lEhQJERAxp4EYLJCuSMZDMyTEQBcjsKEBExJBcwxOYk0xkLAx/IcWAhRYzi9MWaD2sWVKAiIgh9Y9MkJ2aPKOFFG+Wn7FQs9FnSQEiIobUPzwe0SO8AUuzUriiAJkVBYiIGFL/cGTLmAQUZEyuh2WwmQxxRQEiIobkGh6P6BHegILMhYxO+HCPeqJYqzuLAkREDKl/eIKs1Fm0QG5sbaturMgpQETEcCa8PgbHPLNrgWRoY6nZUoCIiOH0D0e+kGKAtradPQWIiBjOwEhgIcXZdGFpMuFsKUBExHBms4xJQG7aQsxJJgXILChARMRwotGFZU4yTU4m1IKKEVOAiIjhzGYvkJvlZ6ZwWXuCREwBIiKGMxBsgUTehQVQkLFQj/HOggJERAxnNnuB3GxpVgpXro/h9Wk2eiQUICJiOK7hCRaYk0hNNs/qcwoyU/D6/PQOqRsrEmEFyLFjx6iqqqKsrAyHw4HXe+tG9IcOHaKiooLS0lJ27twZfN/tdrNp0ybKy8uprq7m5MmTt5z72GOPUVZWNovbEJE7ycDI5EKKka7EGxCcCzKgAIlEyADx+Xw4HA4OHz7M2bNncbvdHDx4cEqZgYEBtm/fTltbG52dnbS2ttLW1gbAnj17WLVqFWfOnGH37t1s27ZtyrkvvvgiixdrT2IRCZ9raGLWA+gwuR4WaC5IpEIGSHt7O0VFRdjtdgDq6+tpbm6eUubo0aOsW7eOwsJCLBYLdXV1wTLNzc04HA4ANm7cyOnTp+nt7QXg2rVrNDY2TmmxiIiE0j8yu5V4A4ItkOsKkEiEDBCn04nNZgv+XFJSQnd3d9hlPnjMarXidDoB+PKXv8xXvvIVUlJSZncXInJH6R8eJzs1Gi2QQBeWAiQSIQMknLXyI1lP/wc/+AFms5mPfvSj05ZrbGzEbrcHXy6Xa8bXEpHE0j88MatZ6AGZKRZSkpO0HlaEQgaIzWab0uLo6urCarWGXcZqtU455nQ6KS4u5pVXXuEnP/kJy5cv59FHH+X8+fPce++9t1y/oaGBjo6O4CsnJ2fmdykiCWN0wsvIhJfstNm3QEwmEwWZKVqRN0IhA6S6uhqn00lHRwcA+/fvp6amZkqZDRs20NraSk9PDx6PhwMHDgTL1NTU0NTUBMCRI0coKysjLy+Pr371qzidTs6dO8err77KsmXLOHHiRLTvT0QSTHAZk1nsBXKzgswUDaJHKGSAmM1mmpqaqK2tpbS0lPT0dLZu3UpLS0twcDwrK4u9e/eyZs0aKisrWbt2LevXrwdgx44dnDhxgvLycnbt2sW+ffvm9o5EJKH1j0RnEmGAAiRyJr/BNgS22+3B1pCI3HneeK+XJ/7l5/yPP/09NqwsnPXn/efvd/AvP/stnV/ZwELL7CYmJqLpfudqJrqIGEo09gK5WeBJrCsaSJ8xBYiIGIorCku53yw/uDOhurFmSgEiIobSH4XNpG62VFvbRkwBIiKG0n9jJd6sKEwkhN8tZ6JHeWdOASIihtI/PEFqspmUWa7EG/C7MRAFyEwpQETEUFzD41Eb/wBISTaTlZqsMZAIKEBExFD6h6OzkOLNCjIXqgsrAgoQETGU/pHxqE0iDCjITNFjvBFQgIiIobiGo7MXyM0C62EZbF51zClARMQw/H4/A3PUhTU87mVwzBPVz010ChARMYzhcS/jXl9U9gK5meaCREYBIiKG0T8S3UmEAZqNHhkFiIgYhmvoxiTCKI+BLFWAREQBIiKGEe1lTAICkwn1KO/MKEBExDCivRdIQF76ApJMWpF3phQgImIY0V6JN8BiTiIvfSGXBtQCmQkFiIgYxsBwdPcCuVlBZgqXrytAZkIBIiKGEWiBRGsl3ptpNvrMKUBExDBcw+NkLLSQbI7+r66CzIVcdo/i82k2erjC+lM4duwYVVVVlJWV4XA48Hq9t5Q5dOgQFRUVlJaWsnPnzuD7brebTZs2UV5eTnV1NSdPngRgaGiIhx56iNWrV1NVVcUzzzyDx6NZoCLy4QaGJ6L+CG9AQWYKHp+fvhvdZBJayADx+Xw4HA4OHz7M2bNncbvdHDx4cEqZgYEBtm/fTltbG52dnbS2ttLW1gbAnj17WLVqFWfOnGH37t1s27YNgNTUVH76059y/Phxfv3rX3Pt2rVbPldE5Gau4fGoP8IbsDRr8lHenn6Ng4QrZIC0t7dTVFSE3W4HoL6+nubm5illjh49yrp16ygsLMRisVBXVxcs09zcjMPhAGDjxo2cPn2a3t5ekpKSSE9PB8Dj8TA2NobJZIrqzYlIYukfif5CigHF2akAXBwYmZPPT0QhA8TpdGKz2YI/l5SU0N3dHXaZDx6zWq04nc7gzx/5yEdYsmQJmZmZbNmyJfI7EZGENxd7gQQUBQKkXwESrpABEs7yxrNZAvnNN9/kwoUL9PX1cezYsVuONzY2Yrfbgy+XyxXxtUTEuHw+P/3D0d8LJKDwRheWAiR8IQPEZrNNaXF0dXVhtVrDLmO1WqccczqdFBcXTzk/PT2dT37yk3z3u9+95foNDQ10dHQEXzk5OWHemogkkutjHnx+or4Sb0BKspm89AVcUICELWSAVFdX43Q66ejoAGD//v3U1NRMKbNhwwZaW1vp6enB4/Fw4MCBYJmamhqampoAOHLkCGVlZeTl5XHlyhX6+/sBGBsb48iRI1RVVUXz3kQkgfTP4STCgKLsVC5oED1sllAFzGYzTU1N1NbWMjY2xtq1a9m6dSstLS20tLTQ1NREVlYWe/fuZc2aNfh8PjZv3sz69esB2LFjB1u2bKG8vJyMjAxeeOEFAC5evMiTTz6J1+vF6/WycePG4GC7iMgH9c/RMiY3K85O5Zfn1U0eLpPfYHs42u32YGtIRO4cxzqv8Of/2s6//vmDrL8nf06u8Xff62D/q7+l8ysbWGgxz8k1jGa637maiS4ihjAwMvctkMCTWFpUMTwKEBExhMBmUnM5BlKcPfkklgbSw6MAERFDcAU3k5r7FshFDaSHRQEiIoYwMDKByQQZKXMfIBdcaoGEQwEiIobgGh4nKzUZc9LcLXmUm7aAhZYkTSYMkwJERAyhf3hiziYRBphMJoqzU7UeVpgUICJiCP3D43M6gB4wOZlQARIOBYiIGEL/yMScDqAHFGWncLF/ZFZr/N0pFCAiYgiuoflpgRRnL2J0wkfvkDaWCkUBIiJxz+P14R71zOkkwoCS3Mknsbr6huf8WkanABGRuBeYA5KbNvctkJLFiwDoVoCEpAARkbjXd6M7KWceAsR2I0C6ehUgoShARCTu9Q6NAfPTAlmSvpDUZDPn1QIJSQEiInHPNTTZhbU4beGcX8tkMlGyeJHGQMKgABGRuNd3owWyeB5aIDDZjaUxkNAUICIS9wKP1M5HFxbAstxFXHKPMjrhnZfrGZUCRETiXt/QOOYkE1lzvJRJQMniRfj94NSiitNSgIhI3OsdGidnUTJJc7iQ4s30KG94FCAiEvf6BsfnbfwDoCR3MkDO9w7N2zWNKKwAOXbsGFVVVZSVleFwOPB6b+0XPHToEBUVFZSWlrJz587g+263m02bNlFeXk51dTUnT54E4Pjx4zzyyCNUVVWxcuVKvvGNb0TplkQk0biGx8mZh2VMAoqzUzGZoKtPXVjTCRkgPp8Ph8PB4cOHOXv2LG63m4MHD04pMzAwwPbt22lra6Ozs5PW1lba2toA2LNnD6tWreLMmTPs3r2bbdu2AbBo0SKef/553n33XV5//XX+6Z/+iePHj0f/DkXE8HqHxslNn78ASUk2szQzRY/yhhAyQNrb2ykqKsJutwNQX19Pc3PzlDJHjx5l3bp1FBYWYrFYqKurC5Zpbm7G4XAAsHHjRk6fPk1vby8VFRVUVlYCkJmZyYoVK+ju7o7qzYmI8fn9flxD89uFBZPjIOfUhTWtkAHidDqx2WzBn0tKSm75RT9dmQ8es1qtOJ3OKee/9957/PKXv+SRRx655fqNjY3Y7fbgy+VyhXlrIpII3CMePD7/vEwivNndS9I53zuEx+ub1+saScgACWdN/Nmsm9/f38+nPvUp/vEf/5HFixffcryhoYGOjo7gKycnJ+JriYjxzOcyJjcrXZLGhNevR3mnETJAbDbblBZHV1cXVqs17DJWq3XKMafTSXFxMQDDw8M89thjPP3003zmM5+Z3Z2ISEJyDU9OIpzvLqzS/HQA3rs6OK/XNZKQAVJdXY3T6aSjowOA/fv3U1NTM6XMhg0baG1tpaenB4/Hw4EDB4JlampqaGpqAuDIkSOUlZWRl5fHxMQENTU1fOITn+CLX/xitO9LRBJE7+D8zkIPKM2bDJD3r2oc5MOEDBCz2UxTUxO1tbWUlpaSnp7O1q1baWlpCQ6OZ2VlsXfvXtasWUNlZSVr165l/fr1AOzYsYMTJ05QXl7Orl272LdvHzD52O/LL7/MSy+9xOrVq1m9ejXf+ta35vBWRcSI5nMp95sV56SywJKkFsg0TH6Dbfxrt9uDrSERSXzf/OkZ/uuPTtO+8+MsyZjfgfQNX3+FjBQLhz//B/N63Xgy3e9czUQXkbh29foY5iTTvI+BANy9JE1dWNNQgIhIXLtyfYzctAWY52kdrJuVLkmnd2ic/hsD+TKVAkRE4trV62Pz3nUVULok8CSWWiG3owARkbh2dTAOAuSKBtJvRwEiInHt6vUx8mMUIGX56ZhMcOrS9ZhcP94pQEQkbg2OeRge98asBZK6wMxduWmc7HHH5PrxTgEiInHr6vXJZUyWpMcmQABWFGZy6pJ7Vks2JSoFiIjErWCAZKTErA73LM3ANTzBlRt1kd9RgIhI3LpyfRSA/MzYtUDuKcwEoEPdWLdQgIhI3IqPLqwMAE71aCD9gxQgIhK3fteFFbsAKc5OJWOhhVOX1AL5IAWIiMStq9fHWLTATNpCS8zqYDKZuKcwQy2Q21CAiEjcuhzDOSA3sxdmcvbqIKMT3lhXJa4oQEQkbvX0j1CYlRrranCvNRuvz89vLgzEuipxRQEiInGrZ2CUwuzYPcIbcJ8tG4Dj3f0xrUe8UYCISFxyj04wOOahKA5aIHfnpZGRYuFXTrVAbqYAEZG41NM/OQckHlogSUkm7rNmc7zbFeuqxBUFiIjEpYsDIwBx0QIBuM+WRXffCL2DmpEeoAARkbgUTy0QgPus2QD8ytkf03rEk7AC5NixY1RVVVFWVobD4cDrvfVRtkOHDlFRUUFpaSk7d+4Mvu92u9m0aRPl5eVUV1dz8uTJ4LHNmzezZMkSysrKonArIpJIem60QOLhKSyA+0tyAGg/p26sgJAB4vP5cDgcHD58mLNnz+J2uzl48OCUMgMDA2zfvp22tjY6OztpbW2lra0NgD179rBq1SrOnDnD7t272bZtW/C8z3/+8/zwhz+M8i2JSCK42D9K2gIzmSmxm0R4syUZCynLT+eN93pjXZW4ETJA2tvbKSoqwm63A1BfX09zc/OUMkePHmXdunUUFhZisVioq6sLlmlubsbhcACwceNGTp8+TW/v5B/ARz/6URYvXhzVGxKRxNAzMEJhdiom0/zvhf5hHr47l19fGGBwzBPrqsSFkAHidDqx2WzBn0tKSuju7g67zAePWa1WnE5n2BVsbGzEbrcHXy6Xmo8idwKna4Ti7Pjovgr4/btz8fr8/PJcX6yrEhdCBkg4m6jM5UYrDQ0NdHR0BF85OTlzdi0RiQ8TXh8X+kdYlrso1lWZ4iN3T/aYvPG+urEgjACx2WxTWhxdXV1Yrdawy1it1inHnE4nxcXFs664iCSuC64RvD4/JYvjK0Dy0hdSUaBxkICQAVJdXY3T6aSjowOA/fv3U1NTM6XMhg0baG1tpaenB4/Hw4EDB4JlampqaGpqAuDIkSOUlZWRl5cX7fsQkQRyvm8YgOW5aTGuya3WlC/h1xcGgkvN38lCBojZbKapqYna2lpKS0tJT09n69attLS0BAfHs7Ky2Lt3L2vWrKGyspK1a9eyfv16AHbs2MGJEycoLy9n165d7Nu3L/jZjz32GA8//DDnzp3DarXy1a9+dY5uU0SMpKt3CCDuurAAProiH78fWjuvxLoqMWfyG2yneLvdHmwNiUhi+sr3Omh69bec+rsNpCSbY12dKSa8Pn7v717mD0pzeW5rdayrM+em+52rmegiEnfO9Q5TmJUSd+EBkGxOYm3FEn525hpjnjt7fxAFiIjEnfevDnJXXvyNfwR8fEUBw+NeXjt7LdZViSkFiIjEldEJL+d6h6goyIh1VT7Ux1bkk5KcxEvvXIx1VWJKASIiceX9q0P4/MR1gGSkJPMJ+1J+1HHpjp6VrgARkbhy+vJ1ACoK0mNck+nV3F/M6ISPo7+5FOuqxIwCRETiSiBAyuO4BQLwaHkeeekLONTeHbpwglKAiEhc6bx0nYLMhWSlJse6KtNKNifxxEMl/OJcH7+5cGdudasAEZG44ff7+ZWzn1XF2bGuSlj+9PeXkWw28fxrv411VWJCASIiccPpGuHa4Dj3l2THuiphKchM4d/fW8R3f3UxuAHWnUQBIiJx43h3PwCrbdkxrcdMfH5tKR6fn6+/fCbWVZl3ChARiRvHu/sxmWCVNSvWVQlb5dIMPn1/MYff6ubsleuxrs68UoCISNz4+fu9VBZkkJkS3wPoH/QfP1GBJSmJ/9Ty7pzujxRvFCAiEheuXh/j3Ytu1lQsiXVVZsyas4gvfbyc18728n/uoMd6FSAiEhdePXsVmNxvw4ieWXM3VUWZfOV7HcG5LIlOASIiceH7Jy6RkWKherkxt61ONifxjSfuJynJxNP/85e4hsZjXaU5pwARkZjrHx6n7fQV/t3KpXG5hHu4Spek840n7qe7b5gtTW8mfIgoQEQk5v5vezcTXj+fvt8a66rM2vrKfP7bZ1dz6pKbzzz3BmevDMa6SnNGASIiMTU87mH/q79lZXEmv3/34lhXJyoeX13Mf//TB+jpH+Hxb77Ki2+ex+tLvKezwgqQY8eOUVVVRVlZGQ6HA6/31l24Dh06REVFBaWlpezcuTP4vtvtZtOmTZSXl1NdXc3JkydDniMid46v//gMV66P8eWPVWAymWJdnaj546qlfOcLj7IsN42d3/4Nn/zmqxz5dU9CBUnIAPH5fDgcDg4fPszZs2dxu90cPHhwSpmBgQG2b99OW1sbnZ2dtLa20tbWBsCePXtYtWoVZ86cYffu3Wzbti3kOSJyZ/jWW07++ZX32VC1lI/bC2Jdnagry0+n5QuP8Leb7Fx2j7Htxbf5g//yE/625V1eO3uNIYPvJWIJVaC9vZ2ioiLsdjsA9fX1NDY2UldXFyxz9OhR1q1bR2FhIQB1dXU0Nzezdu1ampub+f73vw/Axo0befrpp+nt7eXHP/7xh54jIolrzOPlNxcGOPjzLr79zgWqijLZ+5l7Y12tOWMxJ/Hnj9zF5x4q4TvHL/Dtdy5w4I1z/Nvr5zAnmagoyKAsP527chdhW7yI3PQFLE5bSG7aAjJTk0lJTmKBOSkuW2chA8TpdGKz2YI/l5SU0N3dHbLM0aNHb3vMarXidDqnPWeufOMnZ3jl9NUPPR5OwzLULNNQnxHOJNWQRWZZh3Dq4Q/xKWHdR8hrhDp/9k392d5neJ8R6vwwrjHLAvHzd3f6Qh6vn8vuUXx+SDLBEw+VsOuxFaQtDPmryPBSks189sESPvtgCVeuj/Lz9/t461wfv3IO8Mrpq3x3ZOJDzzWZIDXZTEqymRRLEklJJpJMJsxJJkwmMAf/24Q5CZJMk/9tAhZYkjj0Hx6ek3sK+acW1l/+CP5HD/ecxsZGGhsbgz+7XK4ZXytgwutjzOObtkw4IR+ySIgPCXW+KdRH3PiLMX0VQt9I6M8Idf7sv6zZ1iGceoS8j3m4RjhC/ZlF57ua2zqEqofJZOKPq5ZSUZDBx1bkU5CZEsYnJp78jBQ+eV8Rn7yvKPiea2gcp2uE3qEx+obG6Rsaxz0ywajHx+iE98Zr8r99fvD5/Xh9fnz+Gy8feP1+/MH3J/9R4JvDMZeQAWKz2aa0OLq6urBarbeUOXHixG3LWK1Wuru7ufvuu4HJFklxcfG059ysoaGBhoaG4M+BrrRIPPtHlTz7R5URny8iMldy0haQk7Yg1tWYkZCD6NXV1TidTjo6OgDYv38/NTU1U8ps2LCB1tZWenp68Hg8HDhwIFimpqaGpqYmAI4cOUJZWRl5eXnTniMiIvEvZAvEbDbT1NREbW0tY2NjrF27lq1bt9LS0kJLSwtNTU1kZWWxd+9e1qxZg8/nY/Pmzaxfvx6AHTt2sGXLFsrLy8nIyOCFF14AmPYcERGJfya/wdYettvtwdaQiIjMrel+52omuoiIREQBIiIiEVGAiIhIRBQgIiISEcMNomdmZt52vki8crlc5OQYc4OcWNF3NnP6zmZO31l4nE4nbrf7tscMFyBGo6fGZk7f2czpO5s5fWezpy4sERGJiAJEREQiogCZYzev4yXh0Xc2c/rOZk7f2expDERERCKiFoiIiEREASIiIhFRgMyB7u5uPvaxj7FixQqqqqr4m7/5m1hXyVAaGhqwWBJ/h7poGRoaoq6ujsrKSu655x6ee+65WFcp7h08eJB7772X1atX84d/+Id0dnbGukqGpACZAxaLhb//+7/n5MmTvPPOO7z66qt85zvfiXW1DOFnP/sZg4ODsa6GoTz77LNUVVXR2dnJyZMn+fSnPx3rKsW14eFhvvSlL/HTn/6U48ePs2XLFnbt2hXrahmS/pk3BwoLCyksLARgwYIF3H///XR1dcW4VvFvbGyMv/7rv+all17ixRdfjHV1DOH69eu0tLQE/36ZTCby8/NjXKv45vP58Pv9DA4OkpeXx8DAQPD/V5kZBcgc6+vr46WXXuJHP/pRrKsS93bv3k19fT1LliyJdVUM4/3336egoIAvfOEL/OIXv2DZsmV8/etfZ9myZbGuWtxKT0/nm9/8JitXriQrK4usrCzeeOONWFfLkNSFNYfGx8epra3lS1/6Evfcc0+sqxPXTpw4wZtvvsmTTz4Z66oYisfj4fjx49TW1vL222+zadMmnnrqqVhXK65NTEywb98+2tvbuXDhArW1tfzVX/1VrKtlSAqQOeL1evmTP/kTVq9ezbPPPhvr6sS91157jY6ODu666y6WL1+O1+tl+fLlH7qIm0yyWq3k5uby8Y9/HIDPfe5zvPXWWzGuVXw7fvw4fr+fFStWAJPf2euvvx7jWhmTAmSOPPPMM2RkZPAP//APsa6KIfzFX/wFFy9e5Ny5c5w7dw6z2cy5c+fIzMyMddXiWkFBAVVVVbz99tsAvPzyy1RVVcW4VvHNarXS2dnJhQsXgMnvzG63x7hWxqSZ6HPgtdde49FHH2XlypWYzWYAnnrqKb74xS/GuGbGYbFY8Hg8sa6GIXR0dFBfX8/Q0BDZ2dk899xzwX9dy+01NTXxta99jeTkZJYsWcLzzz9PSUlJrKtlOAoQERGJiLqwREQkIgoQERGJiAJEREQiogAREZGIKEBERCQiChAREYmIAkRERCKiABERkYj8f5Cn4Sglf2Z3AAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 388;\n", + " var nbb_unformatted_code = \"posterior.plot()\";\n", + " var nbb_formatted_code = \"posterior.plot()\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "posterior.plot()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Make a Prediction" + ] + }, + { + "cell_type": "code", + "execution_count": 357, + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 357;\n", + " var nbb_unformatted_code = \"n_pred = words_per_day\\nps_pred = binom.sf(0, n_pred, ps)\";\n", + " var nbb_formatted_code = \"n_pred = words_per_day\\nps_pred = binom.sf(0, n_pred, ps)\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "n_pred = words_per_day\n", + "ps_pred = binom.sf(0, n_pred, ps)" + ] + }, + { + "cell_type": "code", + "execution_count": 358, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(0.00016066773432600893, 6224.0250302584855)" + ] + }, + "execution_count": 358, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "application/javascript": [ + "\n", + " setTimeout(function() {\n", + " var nbb_cell_id = 358;\n", + " var nbb_unformatted_code = \"p = np.sum(posterior * ps_pred)\\np, 1 / p\";\n", + " var nbb_formatted_code = \"p = np.sum(posterior * ps_pred)\\np, 1 / p\";\n", + " var nbb_cells = Jupyter.notebook.get_cells();\n", + " for (var i = 0; i < nbb_cells.length; ++i) {\n", + " if (nbb_cells[i].input_prompt_number == nbb_cell_id) {\n", + " if (nbb_cells[i].get_text() == nbb_unformatted_code) {\n", + " nbb_cells[i].set_text(nbb_formatted_code);\n", + " }\n", + " break;\n", + " }\n", + " }\n", + " }, 500);\n", + " " + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "p = np.sum(posterior * ps_pred)\n", + "p, 1 / p" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Copyright 2024 Allen B. Downey\n", + "\n", + "License: [Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0)](https://creativecommons.org/licenses/by-nc-sa/4.0/)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}