From 20fcb1ab1d32cb53694c95fd4f7fa4df8b952614 Mon Sep 17 00:00:00 2001
From: Sylvia Hang Nguyen <49593977+sylviahangnguyen@users.noreply.github.com>
Date: Fri, 25 Jun 2021 18:57:06 +0100
Subject: [PATCH] Add files
---
NER_eng_BiLSTM.ipynb | 2116 +++++++++++++++++++++++++++++++++++++++
NER_trivia_BiLSTM.ipynb | 2104 ++++++++++++++++++++++++++++++++++++++
2 files changed, 4220 insertions(+)
create mode 100644 NER_eng_BiLSTM.ipynb
create mode 100644 NER_trivia_BiLSTM.ipynb
diff --git a/NER_eng_BiLSTM.ipynb b/NER_eng_BiLSTM.ipynb
new file mode 100644
index 0000000..e6bf79a
--- /dev/null
+++ b/NER_eng_BiLSTM.ipynb
@@ -0,0 +1,2116 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "NER_eng_BiLSTM.ipynb",
+ "provenance": [],
+ "collapsed_sections": [
+ "YsSB3CDt2NxF",
+ "WP5M3GnHxY-9",
+ "MRofcZfAUd9g",
+ "4p6nLSAePE9U",
+ "Hm9UnKBqUeZA",
+ "vfswr-bAXwl5"
+ ],
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "accelerator": "GPU"
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "YsSB3CDt2NxF"
+ },
+ "source": [
+ "## Outline\n",
+ "- [Introduction](#0)\n",
+ " - [Import libraries](#0.1)\n",
+ "- [Part 1: Explore the data](#1)\n",
+ " - [1.1 Import the datasets](#1.1)\n",
+ " - [1.2 Exploratory Analysis](#1.2)\n",
+ " - [Conclusion after analysis](#1.3)\n",
+ " \n",
+ "- [Part 2: Pre-process the data](#2)\n",
+ " - [2.1 Stemming](#2.1)\n",
+ " - [2.2 Lemmatization](#2.2)\n",
+ " - [2.3 Replacement](#2.3)\n",
+ " - [2.4 Pre-processing pipeline](#2.4)\n",
+ " - [2.5 Split to train/val datasets](#2.5)\n",
+ " - [2.6 Tokenization and Padding](#2.6)\n",
+ " - [2.7 Check the Imbalance in train dataset](#2.7)\n",
+ " - [2.8 One-hot encoding](#2.8)\n",
+ "\n",
+ "- [Part 3: Build the model](#3)\n",
+ " - [3.1 Glove Embedding](#3.1)\n",
+ " - [3.2 Define the model](#3.2)\n",
+ " - [3.3 Callbacks](#3.3)\n",
+ " \n",
+ "\n",
+ "- [Part 4: Train the model](#4)\n",
+ "- [Part 5: Test the model](#5)\n",
+ "- [Part 6: Test with your own sentence](#6)\n",
+ "\n",
+ "- [Part 7: Analyse the incorrect predictions](#7)\n",
+ " - [Potential improvement](#7.1)\n",
+ "\n",
+ "- [Export result to .tsv file](#8)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Uj9t5Iav2nzR"
+ },
+ "source": [
+ "\n",
+ "# Introduction\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "pZDTBRDfi6u7"
+ },
+ "source": [
+ "\n",
+ "## Import libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Ntqkdg4N3HW4",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "026c8545-ec38-4d57-9ca9-e7d508510898"
+ },
+ "source": [
+ "!python --version\n",
+ "import os\n",
+ "\n",
+ "%tensorflow_version 2.x\n",
+ "import tensorflow as tf\n",
+ "print(tf.__version__)\n",
+ "\n",
+ "# build the tokenized sentences and tags\n",
+ "from tensorflow.keras.preprocessing.text import Tokenizer\n",
+ "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
+ "\n",
+ "from tensorflow.keras.utils import to_categorical\n",
+ "from tensorflow.keras.initializers import Constant\n",
+ "from tensorflow.keras import Model\n",
+ "from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, \\\n",
+ "TimeDistributed, Dense, Dropout\n",
+ "from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping\n",
+ "\n",
+ "import numpy as np # linear algebra\n",
+ "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n",
+ "import seaborn as sns\n",
+ "from collections import Counter\n",
+ "import random as rnd\n",
+ "from nltk.corpus import stopwords\n",
+ "import nltk\n",
+ "nltk.download('stopwords')\n",
+ "from nltk.stem import WordNetLemmatizer \n",
+ "from nltk.stem import PorterStemmer\n",
+ "\n",
+ "!pip install sklearn_crfsuite\n",
+ "from sklearn_crfsuite.metrics import flat_classification_report\n",
+ "!pip install seqeval\n",
+ "from seqeval.metrics import precision_score, recall_score, f1_score, classification_report\n",
+ "import csv\n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Python 3.7.10\n",
+ "2.5.0\n",
+ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+ "[nltk_data] Unzipping corpora/stopwords.zip.\n",
+ "Collecting sklearn_crfsuite\n",
+ " Downloading https://files.pythonhosted.org/packages/25/74/5b7befa513482e6dee1f3dd68171a6c9dfc14c0eaa00f885ffeba54fe9b0/sklearn_crfsuite-0.3.6-py2.py3-none-any.whl\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sklearn_crfsuite) (1.15.0)\n",
+ "Requirement already satisfied: tabulate in /usr/local/lib/python3.7/dist-packages (from sklearn_crfsuite) (0.8.9)\n",
+ "Collecting python-crfsuite>=0.8.3\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/79/47/58f16c46506139f17de4630dbcfb877ce41a6355a1bbf3c443edb9708429/python_crfsuite-0.9.7-cp37-cp37m-manylinux1_x86_64.whl (743kB)\n",
+ "\u001b[K |████████████████████████████████| 747kB 12.5MB/s \n",
+ "\u001b[?25hRequirement already satisfied: tqdm>=2.0 in /usr/local/lib/python3.7/dist-packages (from sklearn_crfsuite) (4.41.1)\n",
+ "Installing collected packages: python-crfsuite, sklearn-crfsuite\n",
+ "Successfully installed python-crfsuite-0.9.7 sklearn-crfsuite-0.3.6\n",
+ "Collecting seqeval\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9d/2d/233c79d5b4e5ab1dbf111242299153f3caddddbb691219f363ad55ce783d/seqeval-1.2.2.tar.gz (43kB)\n",
+ "\u001b[K |████████████████████████████████| 51kB 5.0MB/s \n",
+ "\u001b[?25hRequirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.7/dist-packages (from seqeval) (1.19.5)\n",
+ "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.7/dist-packages (from seqeval) (0.22.2.post1)\n",
+ "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.1)\n",
+ "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.0.1)\n",
+ "Building wheels for collected packages: seqeval\n",
+ " Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for seqeval: filename=seqeval-1.2.2-cp37-none-any.whl size=16184 sha256=ae07dcf27893686fb86ad23dc99961e6c43db9b655a080d92441256538067ba7\n",
+ " Stored in directory: /root/.cache/pip/wheels/52/df/1b/45d75646c37428f7e626214704a0e35bd3cfc32eda37e59e5f\n",
+ "Successfully built seqeval\n",
+ "Installing collected packages: seqeval\n",
+ "Successfully installed seqeval-1.2.2\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "T58oiLSU25Za"
+ },
+ "source": [
+ "\n",
+ "# Part 1: Explore the data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "WP5M3GnHxY-9"
+ },
+ "source": [
+ "\n",
+ "## 1.1 Import the datasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "iHhxVylamxCG",
+ "outputId": "50607de3-273f-432a-e13b-af3344732e19"
+ },
+ "source": [
+ "# Create new directories\n",
+ "!mkdir -p /data/eng\n",
+ "!mkdir -p /data/trivia10k13\n",
+ "!mkdir -p /model\n",
+ "\n",
+ "# Download data\n",
+ "!wget --no-check-certificate \\\n",
+ "https://groups.csail.mit.edu/sls/downloads/movie/engtrain.bio \\\n",
+ "-O /data/eng/train.tsv\n",
+ "\n",
+ "!wget --no-check-certificate \\\n",
+ "https://groups.csail.mit.edu/sls/downloads/movie/engtest.bio \\\n",
+ "-O /data/eng/test.tsv\n",
+ "\n",
+ "!wget --no-check-certificate \\\n",
+ "https://groups.csail.mit.edu/sls/downloads/movie/trivia10k13train.bio \\\n",
+ "-O /data/trivia10k13/train.tsv\n",
+ "\n",
+ "!wget --no-check-certificate \\\n",
+ "https://groups.csail.mit.edu/sls/downloads/movie/trivia10k13test.bio \\\n",
+ "-O /data/trivia10k13/test.tsv"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "--2021-06-03 08:19:10-- https://groups.csail.mit.edu/sls/downloads/movie/engtrain.bio\n",
+ "Resolving groups.csail.mit.edu (groups.csail.mit.edu)... 128.30.2.44\n",
+ "Connecting to groups.csail.mit.edu (groups.csail.mit.edu)|128.30.2.44|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 1013492 (990K)\n",
+ "Saving to: ‘/data/eng/train.tsv’\n",
+ "\n",
+ "/data/eng/train.tsv 100%[===================>] 989.74K 1.85MB/s in 0.5s \n",
+ "\n",
+ "2021-06-03 08:19:11 (1.85 MB/s) - ‘/data/eng/train.tsv’ saved [1013492/1013492]\n",
+ "\n",
+ "--2021-06-03 08:19:11-- https://groups.csail.mit.edu/sls/downloads/movie/engtest.bio\n",
+ "Resolving groups.csail.mit.edu (groups.csail.mit.edu)... 128.30.2.44\n",
+ "Connecting to groups.csail.mit.edu (groups.csail.mit.edu)|128.30.2.44|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 252636 (247K)\n",
+ "Saving to: ‘/data/eng/test.tsv’\n",
+ "\n",
+ "/data/eng/test.tsv 100%[===================>] 246.71K 714KB/s in 0.3s \n",
+ "\n",
+ "2021-06-03 08:19:12 (714 KB/s) - ‘/data/eng/test.tsv’ saved [252636/252636]\n",
+ "\n",
+ "--2021-06-03 08:19:12-- https://groups.csail.mit.edu/sls/downloads/movie/trivia10k13train.bio\n",
+ "Resolving groups.csail.mit.edu (groups.csail.mit.edu)... 128.30.2.44\n",
+ "Connecting to groups.csail.mit.edu (groups.csail.mit.edu)|128.30.2.44|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 1785558 (1.7M)\n",
+ "Saving to: ‘/data/trivia10k13/train.tsv’\n",
+ "\n",
+ "/data/trivia10k13/t 100%[===================>] 1.70M 2.61MB/s in 0.7s \n",
+ "\n",
+ "2021-06-03 08:19:13 (2.61 MB/s) - ‘/data/trivia10k13/train.tsv’ saved [1785558/1785558]\n",
+ "\n",
+ "--2021-06-03 08:19:13-- https://groups.csail.mit.edu/sls/downloads/movie/trivia10k13test.bio\n",
+ "Resolving groups.csail.mit.edu (groups.csail.mit.edu)... 128.30.2.44\n",
+ "Connecting to groups.csail.mit.edu (groups.csail.mit.edu)|128.30.2.44|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 438729 (428K)\n",
+ "Saving to: ‘/data/trivia10k13/test.tsv’\n",
+ "\n",
+ "/data/trivia10k13/t 100%[===================>] 428.45K 995KB/s in 0.4s \n",
+ "\n",
+ "2021-06-03 08:19:14 (995 KB/s) - ‘/data/trivia10k13/test.tsv’ saved [438729/438729]\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "-D8_xEVDN-dj",
+ "outputId": "fe057421-cf99-47e0-cdae-d56f342434b9"
+ },
+ "source": [
+ "def get_sentence(file_path):\n",
+ " '''\n",
+ " Input:\n",
+ " file_path - path to the tsv file\n",
+ " Output:\n",
+ " sentences - list of sentences in string format\n",
+ " tags - list associated tags in string format\n",
+ " '''\n",
+ " sentences = []\n",
+ " tags = []\n",
+ " with open(file_path) as f:\n",
+ " contents = f.read()\n",
+ " sens_tags = contents.split(\"\\n\\n\")\n",
+ " for sen_tag in sens_tags:\n",
+ " words_tags = sen_tag.split(\"\\n\")\n",
+ " while (\"\" in words_tags):\n",
+ " words_tags.remove(\"\")\n",
+ " sen = ' '.join([word_tag.split(\"\\t\")[1] for word_tag in words_tags])\n",
+ " tag = ' '.join([word_tag.split(\"\\t\")[0] for word_tag in words_tags])\n",
+ " sentences.append(sen)\n",
+ " tags.append(tag)\n",
+ "\n",
+ " return sentences, tags\n",
+ "\n",
+ "\n",
+ "train_path = \"/data/eng/train.tsv\"\n",
+ "test_path = \"/data/eng/test.tsv\"\n",
+ "\n",
+ "sentences, tags = get_sentence(train_path)\n",
+ "test_sentences, test_tags = get_sentence(test_path)\n",
+ "\n",
+ "print(\"The train dataset has {} sentences.\".format(len(sentences)))\n",
+ "print(\"The test dataset has {} sentences.\".format(len(test_sentences)))"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "The train dataset has 9776 sentences.\n",
+ "The test dataset has 2444 sentences.\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "SJyYqFkQNCoW"
+ },
+ "source": [
+ "\n",
+ "## 1.2 Exploratory Analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 198
+ },
+ "id": "Q6KzO6mq8326",
+ "outputId": "282abcf1-586b-4c59-87ce-dddcd4c0b66d"
+ },
+ "source": [
+ "# Take a look at the data\n",
+ "df = pd.read_csv(train_path, delimiter=\"\\t\", names=[\"Tag\", \"Word\"])\n",
+ "df.head()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Tag | \n",
+ " Word | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " O | \n",
+ " what | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " O | \n",
+ " movies | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " O | \n",
+ " star | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " B-ACTOR | \n",
+ " bruce | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " I-ACTOR | \n",
+ " willis | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Tag Word\n",
+ "0 O what\n",
+ "1 O movies\n",
+ "2 O star\n",
+ "3 B-ACTOR bruce\n",
+ "4 I-ACTOR willis"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 4
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "tBVUmUQtFRJ1"
+ },
+ "source": [
+ "\n",
+ "### 1.2.1 Sentence Length \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 584
+ },
+ "id": "h2J38zVa-wkD",
+ "outputId": "a8c282e7-c675-42e9-a23a-461f0b386d0b"
+ },
+ "source": [
+ "plt.style.use(\"dark_background\")\n",
+ "\n",
+ "# How long are the sentences?\n",
+ "def plot_sentence_length_histogram(list_sentences):\n",
+ " '''\n",
+ " Input:\n",
+ " list_sentences - a list of sentences\n",
+ " Output:\n",
+ " [print] - Min, Max, Median and Average value of sentence length\n",
+ " [plot] - Histogram plot of sentence length\n",
+ " '''\n",
+ " lengths = [len(sen.split(' ')) for sen in list_sentences]\n",
+ " a4_dims = (11.7, 8.27)\n",
+ " fig, ax = plt.subplots(figsize=a4_dims)\n",
+ " sns.histplot(lengths)\n",
+ " plt.xlabel(\"Number of tokens in a sentence\")\n",
+ " plt.ylabel(\"Number of occurrences\")\n",
+ " print(\"Min: \",np.min(lengths))\n",
+ " print(\"Max: \",np.max(lengths))\n",
+ " \n",
+ " print(\"Median: \",np.median(lengths))\n",
+ " print(\"Average: \",round(np.mean(lengths),2))\n",
+ "\n",
+ "plot_sentence_length_histogram(sentences)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Min: 1\n",
+ "Max: 47\n",
+ "Median: 9.0\n",
+ "Average: 10.18\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "qY8VCLDQhCbJ"
+ },
+ "source": [
+ "### 1.2.2 Entity Length"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 584
+ },
+ "id": "XQrFzlrkhCm3",
+ "outputId": "310ea49d-586a-4ce8-b009-2aaf519a4c9d"
+ },
+ "source": [
+ "# How long are the entities?\n",
+ "def plot_entity_length_histogram(series):\n",
+ " '''\n",
+ " Input:\n",
+ " series - a pandas series of the tags\n",
+ " Output:\n",
+ " [print] - Min, Max, Median and Average value of entity length\n",
+ " [plot] - Histogram plot of entity length\n",
+ " '''\n",
+ " tags_list=[tag for tag in series]\n",
+ " tag_length = []\n",
+ " current_length = 0\n",
+ " for tag in tags_list:\n",
+ " if tag.startswith(\"B\"):\n",
+ " tag_length.append(current_length)\n",
+ " current_length = 1\n",
+ " elif tag.startswith(\"I\"):\n",
+ " current_length += 1\n",
+ " tag_length = tag_length[1:]\n",
+ " \n",
+ " a4_dims = (11.7, 8.27)\n",
+ " fig, ax = plt.subplots(figsize=a4_dims)\n",
+ " sns.histplot(tag_length)\n",
+ " plt.xlabel(\"Number of tokens in a tag\")\n",
+ " plt.ylabel(\"Number of occurrences\")\n",
+ " print(\"Min: \",np.min(tag_length))\n",
+ " print(\"Max: \",np.max(tag_length))\n",
+ " print(\"Median: \",np.median(tag_length))\n",
+ " print(\"Average: \",round(np.mean(tag_length),2))\n",
+ "\n",
+ "plot_entity_length_histogram(df[\"Tag\"])"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Min: 1\n",
+ "Max: 16\n",
+ "Median: 2.0\n",
+ "Average: 1.81\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "OcoUQIfwfOAe"
+ },
+ "source": [
+ "### 1.2.3 Token frequency"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 567
+ },
+ "id": "b2kvMJJ3KvXu",
+ "outputId": "757556df-681a-4624-c19b-94bf69835f53"
+ },
+ "source": [
+ "def plot_top_non_stopwords_barchart(series, top=20, word=True):\n",
+ " '''\n",
+ " Input:\n",
+ " series - a pd.Series of words or tags\n",
+ " top - number of most common words to plot\n",
+ " Output:\n",
+ " [print] - No of distinct words in train dataset\n",
+ " [plot] - Barchart of most common words' occurrence\n",
+ " '''\n",
+ " stop=set(stopwords.words('english'))\n",
+ " value = 'words' if word == True else 'tags'\n",
+ " corpus=[word for word in series]\n",
+ " counter=Counter(corpus)\n",
+ " print(\"There are {} distinct {} in dataset\".format(len(dict(counter)), value))\n",
+ " print(dict(counter))\n",
+ "\n",
+ " most=counter.most_common()\n",
+ " x, y=[], []\n",
+ " for word,count in most:\n",
+ " if (word not in stop):\n",
+ " x.append(count)\n",
+ " y.append(word)\n",
+ " if len(x) == top:\n",
+ " break\n",
+ " a4_dims = (11.7, 8.27)\n",
+ " fig, ax = plt.subplots(figsize=a4_dims)\n",
+ " sns.barplot(x=x,y=y)\n",
+ " plt.xlabel(\"Number of {} occurrences in a sentence\".format(value))\n",
+ " plt.ylabel(\"Most common {}s\".format(value))\n",
+ " return dict(counter)\n",
+ "\n",
+ "word_counter = plot_top_non_stopwords_barchart(df[\"Word\"], top=40)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "There are 6710 distinct words in dataset\n",
+ "{'what': 2938, 'movies': 1702, 'star': 303, 'bruce': 37, 'willis': 24, 'show': 588, 'me': 958, 'films': 510, 'with': 1302, 'drew': 12, 'barrymore': 8, 'from': 895, 'the': 5331, '1980s': 74, 'starred': 245, 'both': 8, 'al': 23, 'pacino': 17, 'and': 777, 'robert': 86, 'deniro': 11, 'find': 810, 'all': 278, 'of': 1414, 'that': 1640, 'harold': 3, 'ramis': 2, 'bill': 33, 'murray': 14, 'a': 4224, 'movie': 3533, 'quote': 53, 'about': 1182, 'baseball': 11, 'in': 2789, 'it': 197, 'have': 335, 'mississippi': 3, 'title': 133, 'science': 163, 'fiction': 162, 'directed': 1122, 'by': 940, 'steven': 50, 'spielberg': 34, 'do': 231, 'you': 413, 'any': 738, 'thrillers': 21, 'sofia': 10, 'coppola': 26, 'leonard': 8, 'cohen': 4, 'songs': 18, 'been': 178, 'used': 16, 'elvis': 10, 'set': 39, 'hawaii': 2, 'is': 2214, 'references': 1, 'zydrate': 1, 'are': 765, 'there': 1062, 'musical': 107, 'patrick': 23, 'dempsey': 1, 'list': 795, 'westerns': 21, 'starring': 759, 'john': 155, 'wayne': 22, 'military': 56, 'related': 4, 'demi': 5, 'moore': 19, 'did': 770, 'stephen': 12, 'made': 300, 'sex': 6, 'or': 42, 'horror': 242, 'must': 90, 'be': 76, 'to': 413, 'watch': 53, 'ang': 2, 'lee': 56, 'direct': 268, 'costume': 2, 'drama': 199, '1800s': 1, 'who': 599, 'first': 100, 'james': 129, 'bond': 37, 'r': 653, 'rated': 1546, 'best': 147, 'picture': 24, 'oscar': 28, 'winners': 3, 'where': 213, 'can': 187, 'i': 433, 'buy': 10, 'sucker': 1, 'punch': 2, 'soundtrack': 64, 'british': 8, 'came': 189, 'out': 384, '1990s': 85, 'royal': 1, 'family': 97, 'camerons': 3, 'titanic': 21, 'was': 1559, 'last': 562, 'judy': 13, 'garland': 11, 'spoof': 1, 'charlie': 20, 'sheen': 10, 'id': 69, 'like': 174, 'documentary': 150, 'doctors': 1, 'chicago': 3, 'hospital': 3, 'action': 270, 'woo': 2, 'romantic': 129, 'film': 1201, 'protagonist': 1, 'doesnt': 4, 'fall': 2, 'love': 68, 'does': 66, 'original': 14, 'little': 23, 'mermaid': 4, 'hans': 5, 'christian': 23, 'anderson': 21, 'come': 127, 'on': 264, 'dvd': 8, 'disney': 48, 'beyonce': 1, 'sang': 7, 'shawn': 5, 'levy': 7, '2010s': 2, 'new': 46, 'david': 85, 'dhawan': 1, 'hannibal': 2, 'lector': 2, 'childrens': 15, 'billy': 39, 'crystal': 8, 'reviewed': 11, 'rock': 15, 'ballads': 1, 'were': 235, 'free': 1, 'willy': 9, 'filmed': 26, 'jennifer': 23, 'aniston': 9, 'main': 36, 'character': 99, 'wars': 32, 'im': 139, 'looking': 254, 'for': 612, 'song': 168, 'day': 46, 'has': 561, 'shortest': 1, 'trailer': 90, 'will': 70, 'always': 8, 'want': 121, 'see': 188, 'clint': 49, 'eastwood': 47, 'comedy': 322, 'whitney': 6, 'houstons': 1, 'u': 2, 'had': 149, 'fast': 9, 'furious': 8, 'jack': 53, 'nicholson': 11, 'an': 657, 'insane': 5, 'asylum': 1, 'dramatic': 10, 'donald': 6, 'sutherland': 9, 'rocky': 14, 'aaron': 4, 'sorkin': 1, 'trent': 1, 'reznor': 1, 'michael': 105, 'jackson': 32, '1970s': 38, 'political': 16, 'alec': 7, 'baldwin': 4, 'keaton': 13, 'play': 81, 'batman': 12, 'ewan': 7, 'macgregor': 3, 'singing': 3, 'superman': 11, 'romance': 110, 'popular': 117, '2002': 6, 'kevin': 45, 'smith': 42, 'jason': 51, 'mews': 1, 'suicide': 5, 'painless': 2, 'their': 7, 'world': 40, 'warcraft': 2, 'wes': 15, 'schwartzman': 1, 'savannah': 1, 'smiles': 1, 'owen': 4, 'wilson': 6, 'considered': 41, 'which': 345, 'features': 45, 'blaze': 1, 'glory': 4, 'maker': 1, 'planning': 2, 'produce': 2, 'farely': 1, 'brothers': 19, 'move': 21, 'frank': 36, 'sinatra': 3, 'something': 6, 'one': 44, 'armed': 2, 'man': 41, 'rick': 9, 'springfield': 2, 'steve': 41, 'martin': 52, 'cameron': 43, 'just': 24, 'two': 128, 'stars': 465, 'less': 4, 'angelina': 18, 'jolie': 16, 'released': 347, 'most': 62, '4': 6, 'winning': 22, '1998': 6, 'roadtrip': 1, 'ben': 15, 'stiller': 8, 'how': 190, 'many': 209, 'based': 79, 'texas': 7, 'chainsaw': 3, 'massacre': 4, 'jimmy': 13, 'stewart': 25, 'ever': 204, 'actor': 166, 'called': 82, 'matchstick': 2, 'men': 14, 'good': 542, 'nc': 151, '17': 152, 'not': 23, 'princess': 14, 'them': 57, 'comedies': 64, 'brad': 44, 'pitt': 33, '2000s': 35, 'godfather': 15, 'featured': 47, 'dude': 2, 'g': 240, 'war': 177, 'ii': 12, 'please': 95, 'locate': 7, 'george': 63, 'lucas': 27, '1996': 4, 'actors': 25, 'cameo': 4, 'spongebob': 1, 'year': 380, 'social': 4, 'network': 2, 'plot': 161, '1': 4, '1990': 169, '1991': 3, 'garry': 3, 'marshall': 15, '2011': 35, 'natalie': 9, 'portman': 2, 'dramas': 37, 'latest': 20, 'williams': 33, 'time': 39, 'car': 19, 'chases': 3, 'worst': 9, 'scary': 49, '2010': 184, 'thriller': 154, 'art': 2, 'heist': 3, 'late': 5, 'earl': 2, 'jones': 41, 'at': 61, 'end': 3, 'bronx': 3, 'tale': 17, 'top': 45, 'scorsesy': 1, 'moive': 2, 'pg': 791, '13': 558, 'ghosts': 13, 'dogs': 9, 'musicals': 26, 'antonio': 8, 'banderas': 7, 'hitchcock': 26, 'color': 3, 'animals': 6, 'five': 157, 'youre': 5, 'stories': 5, 'amanda': 7, 'seyfried': 3, 'ill': 11, 'back': 31, 'foreign': 8, 'blue': 8, 'rob': 23, 'reiner': 4, '1992': 2, 'drill': 1, 'team': 8, 'give': 55, 'ice': 15, 'baby': 11, 'vanilla': 1, 'atmospheric': 1, 'bambi': 1, 'teen': 29, 'got': 80, 'operas': 1, 'selma': 3, 'blair': 4, 'cars': 10, 'dragons': 5, 'clooney': 20, 'leonardo': 27, 'dicaprio': 24, 'danny': 25, 'devito': 10, 'krasinski': 1, 'look': 5, 'up': 71, 'could': 58, 'mobster': 4, 'friday': 8, '13th': 4, 'loved': 6, 'tomb': 1, 'raider': 1, 'carrel': 1, 'remake': 6, 'dirty': 8, 'dancing': 7, 'similar': 8, 'gnomeo': 2, 'juliet': 3, 'talking': 16, 'scorsese': 13, 'documentaries': 13, 'pollution': 2, 'china': 1, 'famous': 24, 'name': 486, 'cary': 17, 'grant': 22, 'eva': 3, 'marie': 1, 'saint': 4, 'billing': 1, 'backdraft': 1, 'theme': 32, '8': 3, 'mile': 4, 'america': 4, 'fererra': 1, 'susan': 11, 'sarandon': 7, 'jenna': 1, 'elfman': 4, 'julie': 15, 'andrews': 7, 'grease': 1, 'russel': 4, 'crowe': 9, 'mathematician': 1, '2001': 16, 'played': 99, 'ron': 25, 'weasley': 1, 'harry': 37, 'potter': 31, 'groups': 2, 'robbers': 1, 'robbing': 1, 'same': 6, 'bank': 9, 'whats': 111, 'coolio': 1, 'funniest': 8, 'mobsters': 2, 'recent': 31, 'sequel': 8, 'speed': 4, 'anthony': 28, 'minghella': 1, 'matt': 18, 'damon': 14, 'milos': 2, 'forman': 2, 'life': 26, 'together': 9, 'when': 97, 'hobbit': 4, 'woody': 24, 'allen': 33, 'other': 17, 'people': 62, 'think': 27, 'iii': 6, 'terrible': 2, 'highest': 31, 'kids': 26, 'release': 22, 'involving': 34, 'cartoons': 3, 'get': 52, 'biography': 104, 'real': 9, 'us': 5, 'president': 8, 'portrayed': 4, 'funny': 33, 'wedding': 5, 'toasts': 1, 'britney': 1, 'spears': 1, 'she': 8, 'satisfaction': 1, 'indiana': 19, 'judd': 7, 'apatow': 6, 'since': 5, 'dog': 9, 'chase': 25, 'statum': 1, 'nora': 1, 'ephron': 1, 'write': 5, 'spielbergs': 5, 'award': 15, 'well': 121, 'known': 16, 'julia': 26, 'roberts': 28, '1993': 1, '2012': 19, 'awful': 1, 'teenagers': 3, 'lost': 13, 'woods': 8, 'contained': 2, 'line': 19, 'phone': 3, 'home': 13, 'police': 28, 'pg13': 4, 'darren': 1, 'aranofsky': 1, 'reviews': 49, 'before': 19, '2000': 159, 'giraffe': 1, 'health': 7, 'care': 3, 'lists': 1, 'braddock': 1, 'includes': 5, 'your': 18, 'touch': 2, 'black': 26, 'keys': 1, 'critics': 23, 'non': 6, 'rating': 479, 'next': 12, 'coming': 40, 'courtney': 2, 'foxx': 4, 'looks': 2, 'great': 17, 'this': 106, 'shining': 61, 'boats': 3, 'beautiful': 3, 'mind': 9, 'score': 10, 'shierly': 1, 'temples': 2, 'greatest': 4, 'diaz': 8, 'named': 34, 'hugo': 1, 'high': 23, 'school': 17, 'sean': 35, 'penn': 7, 'horses': 4, 'sci': 114, 'fi': 114, 'laurence': 12, 'olivier': 10, '1940s': 6, 'bourne': 13, 'featuring': 66, 'alien': 26, 'invasion': 4, 'some': 288, 'lewis': 18, '3d': 4, 'animated': 87, 'monsters': 7, 'vs': 2, 'aliens': 11, 'appropriate': 5, 'toddlers': 1, 'somewhere': 6, 'tom': 74, 'hanks': 31, 'keanu': 10, 'reeves': 15, 'need': 20, 'quotes': 9, 'trailers': 4, 'lesbian': 2, 'concert': 2, '1973': 1, 'receive': 19, 'genre': 77, 'walk': 4, 'remember': 7, 'part': 13, 'beatles': 8, 'simon': 4, 'pegg': 1, 'uses': 11, 'abba': 1, 'ridley': 20, 'scott': 39, 'music': 18, 'luke': 7, 'my': 60, 'heart': 14, 'go': 26, 'won': 23, '1999': 9, 'twilight': 9, 'expected': 2, 'appear': 20, 'theatres': 1, 'four': 168, 'higher': 10, '1950s': 9, 'boxing': 8, 'hopkins': 13, 'everythings': 1, 'roses': 1, 'howard': 28, 'meryl': 8, 'streep': 9, 'mad': 13, '1969': 2, 'scarlett': 5, 'johansson': 2, 'spaceships': 2, 'candy': 5, 'helen': 8, 'hunt': 8, '1960s': 30, 'names': 10, 'books': 10, 'fifth': 2, 'freddy': 6, 'krueger': 4, 'gary': 19, 'ross': 2, 'tobey': 1, 'maguire': 1, 'peter': 44, 'than': 12, 'lord': 13, 'rings': 13, 'historical': 39, 'jeff': 10, 'daniels': 3, 'midsummer': 1, 'nights': 6, 'dream': 4, 'romatic': 2, 'johnny': 45, 'depp': 40, 'christopher': 42, 'plummer': 2, 'win': 9, 'selling': 4, 'novel': 4, 'vampires': 14, 'bay': 19, 'psychiatric': 1, 'hospitals': 1, 'pretty': 23, 'pink': 7, '1930s': 6, 'scifi': 3, 'villain': 5, 'wins': 2, 'adventure': 150, 'rolling': 2, 'stones': 2, 'lone': 2, 'ranger': 3, 'moulin': 3, 'rouge': 3, 'awards': 10, 'silent': 9, 'harpo': 1, 'marx': 1, 'scream': 6, 'skating': 2, 'roger': 12, 'mpaa': 32, 'henry': 10, 'thomas': 11, 'quentin': 14, 'tarentino': 2, 'directors': 5, 'only': 11, 'psychological': 28, 'stanly': 1, 'kubrick': 21, '1971': 2, 'ed': 4, 'norton': 12, 'recently': 14, 'babies': 2, 'around': 47, 'presley': 3, 'ma': 4, 'collection': 1, 'van': 13, 'dame': 1, 'lead': 39, 'animal': 7, 'jmes': 1, 'alfred': 25, 'suspense': 19, 'series': 18, 'spiderman': 5, 'moviews': 1, '12': 4, 'gave': 9, 'cruise': 25, 'biggs': 5, 'favorite': 14, 'things': 13, 'hilary': 2, 'duff': 1, 'tell': 88, '2004': 8, 'ad': 1, 'placement': 1, 'arnold': 14, 'schwarzenegger': 11, 'say': 9, 'mw': 1, 'nice': 1, 'enrique': 1, 'toy': 11, 'story': 31, 'robbery': 10, 'now': 14, 'director': 154, 'kung': 8, 'fu': 8, 'ashley': 4, 'olsen': 3, 'hanksmeg': 1, 'ryan': 35, 'cillian': 2, 'murphy': 16, 'role': 58, 'jonah': 7, 'hill': 10, 'suspensful': 1, 'mansell': 1, 'done': 36, 'voice': 36, 'shrek': 20, 'cant': 11, 'wait': 2, 'king': 26, 'characters': 18, '2001s': 1, 'oceans': 5, '11': 2, 'spike': 17, 'vin': 5, 'diesel': 3, 'involve': 7, 'packed': 1, 'should': 5, 'humor': 7, 'too': 5, 'mathematicians': 1, 'three': 101, 'sylvester': 7, 'stallone': 7, 'dancers': 6, 'house': 28, 'sand': 8, 'fog': 4, 'actress': 19, 'kristen': 7, 'would': 97, 'marvel': 4, 'comics': 1, 'through': 1, 'eyes': 4, 'tomorrow': 1, 'never': 4, 'dies': 1, '2003': 1, 'wesley': 5, 'snipes': 3, 'swayze': 5, 'cinderella': 2, 'judi': 5, 'dench': 5, 'adventures': 5, 'tintin': 4, 'mermaids': 1, 'whos': 6, 'as': 127, 'chick': 8, 'flick': 49, 'lawrence': 13, 'turner': 7, 'pirates': 21, 'caribbean': 11, 'explosions': 3, 'robots': 5, 'elise': 1, 'clifton': 2, 'ward': 4, 'mockumentary': 127, 'guest': 2, 'mojave': 1, 'mon': 1, 'his': 25, 'fish': 9, 'says': 12, 'keep': 1, 'swimming': 1, '2': 32, 'years': 378, '1997': 7, 'jacksons': 2, 'mash': 9, 'clown': 4, 'posse': 2, 'mariah': 4, 'carey': 7, 'act': 29, 'jamie': 11, 'curtis': 7, 'decode': 1, 'rosie': 4, 'odonnell': 4, 'duchovnys': 1, 'edward': 18, 'jon': 11, 'bon': 2, 'jovi': 2, 'appeared': 13, 'parker': 9, 'superhero': 4, 'ghost': 7, 'rider': 5, 'la': 6, 'dolce': 2, 'vita': 2, 'wrote': 10, 'avatar': 10, 'interview': 4, 'vampire': 18, 'elizabeth': 15, 'taylor': 18, 'national': 1, 'velvet': 3, 'trying': 16, 'take': 17, 'over': 23, 'breeding': 1, 'humans': 3, 'created': 7, 'soundtrak': 1, 'nominated': 7, 'but': 19, 'havent': 1, 'going': 12, 'sin': 6, 'city': 23, 'joely': 1, 'richardson': 3, 'sharks': 6, 'lowest': 4, 'bad': 8, 'guy': 10, 'tron': 2, 'children': 42, 'ratatouille': 1, 'hunger': 17, 'games': 19, 'preview': 23, 'argonauts': 1, 'stuff': 2, 'dreams': 3, 'tim': 39, 'burtons': 1, 'newest': 15, 'ther': 1, 'gene': 24, 'wilder': 20, 'army': 16, 'darkness': 8, 'kind': 10, 'valentines': 4, 'sports': 22, '99': 1, 'basketball': 5, 'teams': 1, 'marisa': 4, 'tomei': 4, 'hattie': 1, 'mcdaniel': 1, 'avengers': 2, 'kirk': 6, 'douglas': 11, 'adam': 24, 'sandler': 21, 'horrors': 2, 'joe': 21, 'pesci': 12, 'craven': 3, 'breakfast': 1, 'club': 4, 'halle': 9, 'berry': 8, 'obi': 1, 'wan': 1, 'kenobi': 1, 'roman': 15, 'holiday': 2, 'every': 6, 'serenity': 2, 'ages': 1, 'jaws': 11, 'bruc': 1, 'ni': 1, 'battle': 11, 'algiers': 1, 'cabin': 2, 'disaster': 33, 'sandra': 20, 'bullock': 14, 'pullman': 3, 'brendan': 3, 'fehr': 1, 'its': 28, 'highly': 148, 'recommended': 55, 'summer': 7, 'allison': 1, 'hannigan': 5, 'band': 6, 'camp': 11, 'connelly': 2, 'means': 2, 'pirate': 8, 'muir': 1, 'happy': 5, 'feet': 4, 'trek': 15, '10': 12, 'friendly': 4, 'scariest': 4, 'halloween': 6, '1982': 2, 'robin': 27, 'scores': 2, 'grace': 2, 'pixar': 7, '2005': 6, 'lindsay': 3, 'lohan': 5, 'boyscout': 1, 'big': 12, 'kid': 16, 'casey': 1, 'affleck': 4, 'predator': 1, 'malcolm': 4, 'reynolds': 9, 'smurfs': 1, 'ten': 103, 'sellers': 3, 'burger': 1, 'robot': 16, 'adult': 3, '1975': 5, 'female': 13, 'leads': 3, 'wrestler': 2, 'carrie': 4, 'alone': 6, 'wall': 61, 'street': 8, 'financial': 2, 'collapse': 1, 'joss': 2, 'whedon': 2, 'make': 51, 'another': 8, '90s': 34, 'concentration': 3, 'survivor': 1, 'buddha': 1, 'contains': 3, 'bigger': 4, 'boat': 5, 'clooneys': 2, 'oldest': 1, 'ossie': 1, 'davis': 18, 'nicholas': 9, 'sparks': 2, 'jolies': 2, 'changeling': 1, 'mighty': 3, 'true': 16, 'jackie': 10, 'chan': 12, 'chris': 21, 'tucker': 7, 'religion': 4, 'stalker': 1, 'eric': 10, 'idle': 2, 'tyler': 11, 'perry': 8, 'goldfinger': 3, 'zombie': 14, 'hills': 4, 'beverly': 2, 'cop': 9, 'word': 11, 'laser': 2, 'doubt': 1, 'hulk': 5, '70s': 4, '5': 10, 'cousin': 3, 'vinney': 1, 'evil': 23, 'twins': 4, '24': 3, 'dinosaur': 1, 'daze': 1, 'safe': 3, 'noir': 103, 'crime': 115, 'anatomy': 1, 'murder': 26, 'nighy': 1, 'moonrise': 1, 'kingdom': 2, 'plays': 31, '500': 1, 'days': 13, 'spies': 2, 'noire': 1, '1986': 1, 'mystery': 121, 'third': 10, 'chim': 1, 'chimeney': 1, 'bette': 13, 'midler': 6, 'charlies': 1, 'angeles': 5, 'nicole': 14, 'kidman': 10, 'ashton': 4, 'kutcher': 4, 'napoleon': 2, 'dynamite': 1, 'anything': 5, 'leprechaun': 2, 'positive': 1, 'header': 1, 'directing': 9, 'alex': 7, 'haleys': 1, 'x': 11, 'academy': 12, 'polot': 1, 'tokyo': 1, 'france': 2, '60s': 4, 'classic': 18, 'stayin': 1, 'alive': 13, 'lethal': 2, 'weapon': 3, 'voiced': 4, 'donkey': 4, 'snowmen': 1, 'lastest': 2, 'gosling': 5, 'wash': 2, 'theaters': 10, 'currently': 5, 'travel': 3, 'isnt': 2, 'superheroes': 4, 'incredibles': 1, 'shakespearean': 1, 'branagh': 3, 'emma': 7, 'thompson': 13, 'andromeda': 1, 'strain': 1, 'josh': 8, 'duhamel': 2, 'fans': 2, 'christina': 3, 'applegate': 2, 'evening': 2, 'news': 2, 'dead': 39, '1980': 168, 'gone': 10, 'wind': 10, 'else': 4, 'blackhawk': 1, 'down': 8, 'happens': 4, 'he': 22, 'nightmare': 8, 'christmas': 17, 'hellow': 1, 'goodfellas': 10, 'celine': 1, 'dions': 1, 'really': 125, 'classical': 4, 'musicians': 6, 'boxers': 1, 'run': 38, 'super': 5, 'hero': 8, 'review': 26, 'alvin': 3, 'chipmunks': 3, 'die': 8, 'hitler': 4, 'post': 7, 'apocalyptic': 4, 'genesis': 1, 'rodriguez': 2, 'ferrell': 12, 'kirsten': 4, 'dunst': 3, 'hard': 10, 'also': 21, 'shoot': 2, 'em': 2, '21': 4, 'jump': 5, 'critically': 57, 'acclaimed': 56, 'divorce': 3, 'margaret': 1, 'thatcher': 3, 'babe': 3, 'ruth': 1, 'nutrition': 1, 'healthy': 1, 'eating': 4, 'red': 6, 'tails': 2, 'scenes': 5, 'viewers': 34, 'terry': 9, 'gilliam': 4, 'detachment': 1, 'flash': 1, '1967': 2, 'more': 26, '3': 22, 'haunted': 14, 'shark': 4, 'norman': 4, 'reedus': 1, 'mr': 17, 'mrs': 2, 'harrison': 22, 'ford': 52, 'shelby': 1, 'steel': 6, 'magnolias': 3, 'panther': 6, 'glenn': 5, 'close': 5, 'inspirational': 1, 'football': 5, 'principle': 2, 'shooting': 1, 'locations': 1, 'fistful': 1, 'dollars': 1, 'feature': 19, 'mafia': 4, 'goes': 4, 'audiences': 9, 'jane': 15, 'eyre': 2, 'carpenter': 3, 'urban': 4, 'cowboy': 14, 'pauly': 2, 'shore': 2, 'panda': 2, '1995': 10, 'puss': 2, 'n': 3, 'boots': 2, 'seven': 198, 'samurai': 5, 'amish': 1, 'beals': 1, 'dancer': 1, 'american': 23, 'reunion': 1, 'matrix': 15, 'hartnett': 1, '200s': 1, 'joseph': 12, 'gordon': 6, 'levitt': 3, 'hammer': 2, 'nemo': 5, 'finding': 5, 'spider': 9, 'max': 1, 'candace': 1, 'bergen': 3, 'minnie': 1, 'driver': 8, 'trilogy': 4, 'mirrens': 1, 'biographies': 2, 'crazy': 4, 'horse': 9, 'liz': 3, 'wayans': 3, 'castle': 3, 'sky': 4, 'vow': 4, 'macgoohan': 2, 'weird': 4, 'motion': 3, 'guns': 3, 'witches': 6, 'destroy': 1, 'cowboys': 2, 'polticial': 1, 'oliver': 17, 'stone': 19, 'voices': 4, 'gingerbread': 1, 'legal': 2, '80s': 15, 'barry': 8, 'lyndon': 2, 'metro': 1, 'goldwyn': 1, 'mayer': 2, 'become': 2, 'studio': 3, 'pie': 4, 'sparrow': 5, '1930': 3, 'average': 219, 'stare': 2, 'goats': 1, 'zombies': 9, 'lorax': 2, 'doom': 5, 'booth': 2, 'am': 75, 'maya': 2, 'rudolph': 2, 'beowulf': 1, 'drive': 11, 'killer': 21, 'birds': 5, 'casino': 3, 'royale': 1, 'aaliyah': 1, 'soundtracks': 8, 'switchfoot': 1, 'kate': 30, 'beckinsale': 4, 'rush': 5, 'hour': 4, 'eilera': 1, 'ray': 4, 'liota': 2, 'wiz': 1, 'gillian': 3, 'teenage': 5, 'wasteland': 1, 'ash': 1, 'angry': 2, 'seeking': 9, 'justice': 4, 'kirstie': 2, 'alley': 2, 'travolta': 20, 'spanish': 3, 'pattinson': 3, 'spirited': 1, 'away': 4, 'video': 1, 'denzel': 13, 'washington': 13, 'truman': 1, 'blackstreets': 1, 'times': 3, 'hamm': 1, 'groundhog': 1, 'valor': 1, 'playing': 8, 'neo': 3, 'midlers': 1, 'sta': 1, 'dash': 2, 'theremin': 1, 'docudrama': 7, 'iraq': 1, 'telepathic': 1, 'relationship': 8, 'human': 3, 'meshell': 1, 'dialog': 8, 'espiranto': 1, 'under': 2, 'sea': 5, 'kenan': 1, 'fantasy': 207, 'europe': 1, 'diesal': 1, 'road': 9, 'trips': 1, 'licensed': 1, 'bridesmaids': 2, 'ball': 3, 'apart': 1, 'surfer': 7, 'lestat': 1, 'planet': 10, 'space': 30, 'jerry': 12, 'seinfeld': 2, 'boys': 8, 'mcg': 1, 'lithgow': 4, 'detective': 5, 'casa': 1, 'de': 27, 'mi': 1, 'padre': 1, 'runaway': 7, 'jury': 2, 'hood': 6, 'nurses': 2, 'baker': 1, 'special': 2, 'effects': 2, 'lees': 1, 'lois': 1, 'maxwells': 1, 'vantage': 2, 'point': 6, 'japan': 3, 'walken': 8, 'white': 17, 'graduate': 1, 'scanners': 1, 'dee': 6, 'werewolves': 3, 'fight': 7, 'loving': 2, 'screenplay': 3, 'dark': 31, 'shadows': 2, 'walking': 1, 'tall': 1, 'heres': 1, 'penguin': 2, 'returns': 2, 'gia': 1, 'hunting': 2, 'elephants': 2, 'horrific': 1, 'thieves': 4, 'teachers': 1, 'facing': 1, 'adversity': 2, 'warwick': 2, 'heath': 8, 'ledger': 7, 'themed': 5, 'beast': 3, 'bob': 21, 'hoskins': 1, 'cooley': 1, 'riddler': 1, 'riding': 3, 'determinate': 1, 'book': 18, 'benjamin': 2, 'bratt': 1, 'campbell': 10, 'credited': 1, 'writer': 4, 'male': 4, 'abraham': 1, 'lincoln': 1, 'santa': 6, 'clause': 4, 'waht': 2, 'shaq': 2, 'shutter': 1, 'island': 7, 'nosferatu': 2, 'lion': 9, 'mona': 1, 'carvell': 1, 'maggie': 4, 'meet': 5, 'halfway': 1, 'politicians': 1, 'gus': 4, 'zant': 1, 'genres': 2, 'besides': 4, 'salt': 1, 'legolas': 1, 'franco': 4, 'theres': 3, 'no': 4, 'place': 20, 'lex': 2, 'boreanaz': 1, 'renee': 11, 'zellweger': 4, 'flicks': 8, 'forest': 3, 'fires': 1, 'said': 19, 'kings': 2, 'speech': 1, 'molly': 3, 'ringwold': 1, 'viewer': 7, '1972': 1, 'after': 12, 'skaters': 1, 'reese': 5, 'witherspoon': 4, 'large': 1, 'scale': 2, 'natural': 3, 'disasters': 1, 'casablanca': 3, 'snl': 1, 'madden': 1, 'sergio': 11, 'leone': 11, 'philip': 4, 'seymour': 8, 'hoffman': 13, 'transvestite': 1, 'mulholland': 1, 'justin': 8, 'timberlake': 6, 'jodie': 11, 'foster': 12, 'rio': 2, 'actresses': 4, 'miss': 5, 'moneypenny': 1, 'terminator': 9, 'bio': 4, 'pic': 3, 'e': 65, 'happened': 1, 'eleven': 3, 'cher': 3, 'fp': 1, 'greenaway': 1, 'aerosmith': 1, '1947': 1, 'dany': 1, 'boyles': 1, 'burton': 26, 'underworld': 6, 'streetcare': 1, 'desire': 4, 'stalag': 1, 'age': 10, 'corrupt': 1, 'policeman': 2, 'celebrity': 1, 'inc': 1, 'industry': 5, 'faye': 4, 'dunaway': 4, 'appears': 1, 'blues': 2, 'm': 12, 'quantum': 1, 'solace': 1, 'harrelson': 3, 'werewolf': 2, 'london': 12, 'during': 35, 'transformation': 1, 'scene': 8, 'channing': 3, 'tatum': 3, 'fred': 11, 'keaunu': 1, 'inner': 1, 'celebration': 1, 'richard': 27, 'gere': 6, 'amount': 2, 'sequels': 7, 'gena': 2, 'district': 1, '9': 3, 'thousand': 2, 'words': 1, '1920s': 2, 'torch': 1, 'singer': 9, 'jim': 32, 'carrey': 22, 'devil': 5, 'inside': 2, 'bottle': 1, 'shock': 1, 'farley': 2, 'critical': 2, 'future': 19, 'whoopi': 3, 'goldberg': 3, 'ip': 1, 'cube': 3, 'shadow': 2, 'fourth': 1, '1945': 1, 'transformers': 5, 'william': 22, 'shatner': 4, 'strada': 2, 'acted': 16, 'psychiatrist': 1, 'enters': 2, 'vincent': 8, 'price': 4, 'boris': 1, 'karloff': 1, 'winslet': 8, 'thats': 23, 'train': 13, 'woman': 22, 'mgm': 3, 'kaye': 1, 'betty': 3, 'yellow': 2, 'labrador': 1, 'control': 3, 'selection': 1, 'premiere': 2, 'jj': 4, 'abrams': 4, 'worth': 3, 'watching': 4, 'claire': 2, 'god': 6, 'bernard': 5, 'herman': 7, 'mewes': 1, 'identity': 15, 'chevy': 8, 'golden': 4, 'pond': 2, 'knight': 9, 'rises': 3, 'beneath': 2, 'wings': 1, 'alan': 16, 'rickman': 4, 'silence': 9, 'lambs': 7, '1988': 5, 'cherry': 1, 'pepsie': 1, 'queen': 8, 'victoria': 4, 'practical': 1, 'magic': 8, 'chasing': 1, 'amy': 8, 'starrted': 1, 'harakiri': 1, 'crown': 1, 'affair': 5, 'prison': 7, '50s': 2, 'quinton': 1, 'tarrantino': 1, 'mia': 1, 'farrow': 1, 'gun': 5, 'elmer': 4, 'bernstein': 3, 'hogan': 1, 'rope': 1, 'speilberg': 2, 'produced': 21, 'han': 1, 'solo': 2, 'co': 10, 'hula': 1, 'they': 13, 'sandlot': 2, 'tina': 5, 'fey': 4, 'screenplays': 1, 'greed': 4, 'wallstreet': 1, '1989': 3, 'cabiria': 1, 'shawshank': 11, 'redemption': 16, 'ana': 1, 'karenina': 1, 'performed': 2, 'emelio': 1, 'estevez': 3, 'hockey': 5, 'mel': 40, 'gibson': 20, 'least': 25, 'box': 8, 'office': 7, 'upon': 4, 'bacon': 5, 'off': 11, 'novels': 2, 'inspector': 4, 'clouseau': 1, 'mars': 4, 'needs': 1, 'moms': 1, 'grossing': 12, 'prequel': 3, 'roots': 1, 'pandas': 1, 'type': 17, 'rosemarys': 1, 'renoylds': 1, 'critic': 4, 'ebert': 1, 'shortbus': 2, 'adams': 8, 'enchanted': 1, '2007': 1, 'replacements': 2, 'sigourney': 2, 'weaver': 4, 'franchise': 2, 'skellington': 1, '2006': 3, 'directs': 1, 'notebook': 2, 'tourist': 1, 'linkin': 2, 'park': 6, 'leading': 6, 'actoractress': 1, 'wwii': 7, 'prince': 7, 'hell': 7, 'anymore': 2, 'mcgoohan': 1, 'diane': 3, 'pltf': 1, 'nemesis': 1, 'connery': 17, 'hope': 4, 'cuttlefish': 1, 'benny': 3, 'period': 8, 'pieces': 3, 'medieval': 2, 'superbad': 2, '1941': 2, 'rosebud': 3, 'blazing': 4, 'saddles': 5, 'supporting': 6, 'kermit': 2, 'muppets': 2, 'gael': 1, 'garcia': 4, 'bernal': 1, 'balloon': 1, 'giants': 1, 'francis': 18, 'hostel': 2, 'whose': 2, 'mouse': 4, 'staring': 32, 'j': 19, 'edgar': 4, 'hackman': 12, 'without': 6, 'gardening': 1, 'patterson': 1, 'handle': 2, 'truth': 3, 'humphrey': 13, 'bogart': 14, 'airplane': 11, 'austin': 9, 'powers': 10, 'feel': 4, 'buster': 4, 'higest': 1, 'know': 62, 'loves': 1, 'audry': 1, 'hepburn': 25, 'thr': 3, 'sixth': 2, 'zed': 1, 'lockhart': 1, 'pre': 1, '911': 3, 'trade': 1, 'center': 1, 'covered': 1, 'web': 1, 'waynes': 2, 'grit': 5, 'include': 4, 'incredible': 1, 'cry': 1, 'sister': 2, 'dillon': 4, 'drug': 4, 'use': 3, 'merican': 1, 'jessica': 10, 'alba': 3, 'marty': 4, 'mcfly': 2, '1977': 1, 'cast': 17, 'ivory': 3, 'everyone': 3, 'malcovich': 3, 'larenz': 2, 'tates': 1, 'chracter': 1, 'menace': 1, 'society': 1, 'pacimp': 1, 'houston': 6, 'luck': 7, 'lady': 5, 'comes': 8, 'bullit': 1, 'whay': 2, 'protocol': 1, 'droid': 1, 'tornadoes': 1, 'jeremy': 11, 'renner': 2, 'columbiana': 1, 'oscars': 8, 'extremely': 1, 'loud': 1, 'incredibly': 1, 'blow': 7, 'gaslight': 1, 'old': 12, '40': 1, 'ago': 5, 'league': 6, 'extraordinary': 6, 'gentlemen': 4, 'congeniality': 1, 'second': 6, 'carribean': 5, 'hudsons': 1, 'ranking': 1, 'd': 10, 'w': 1, 'griffith': 3, 'epic': 6, 'him': 4, 'offer': 3, 'refuse': 3, 'doc': 6, 'professor': 2, 'snapes': 1, 'executive': 1, 'producer': 2, 'numbers': 1, 'mohicans': 1, 'meg': 8, 'strange': 4, 'land': 3, 'eleased': 1, 'ingmar': 19, 'bergman': 23, 'abu': 1, 'calire': 1, 'danes': 2, 'blockbuster': 6, 'positively': 1, 'pitts': 2, 'barrymores': 2, 'monkees': 1, 'left': 4, 'wright': 3, 'western': 173, 'popularize': 1, 'knights': 2, 'important': 4, 'figure': 2, 'history': 61, 'india': 2, 'fighting': 6, '20s': 1, 'spain': 1, 'battlestar': 1, 'galactica': 1, 'secret': 12, 'ooze': 1, 'motorcycle': 1, 'showed': 3, 'pactick': 1, 'drag': 3, 'dave': 7, 'chappelle': 2, 'weed': 1, 'youll': 2, 'lorre': 3, 'fallon': 1, 'serious': 2, 'hits': 7, 'wonderful': 5, 'thing': 5, 'tigger': 1, 'cartoon': 5, 'scored': 5, 'tupac': 1, 'janet': 2, 'tackles': 1, 'idea': 1, 'discrimination': 1, 'b': 7, 'goonies': 8, 'major': 9, 'serves': 1, 'rate': 15, 'kissing': 2, 'deseree': 1, 'cannibal': 1, 'holocaust': 4, 'pixars': 2, 'stanley': 23, 'techno': 2, 'gets': 6, 'week': 1, 'claus': 1, 'windtalkers': 1, 'humphry': 1, 'zemeckis': 9, 'chocolates': 2, 'computer': 7, 'graphics': 1, 'encounters': 1, '1981': 4, 'jonas': 1, 'fidelity': 1, 'lebowski': 1, 'casinos': 1, 'kyle': 5, 'charlize': 12, 'therons': 1, 'gangs': 3, 'clips': 7, 'winnie': 1, 'pooh': 1, 'tarantino': 12, 'arthur': 10, 'chocolat': 1, 'giant': 7, 'cock': 1, 'blocking': 1, 'jam': 7, 'composer': 3, 'amelie': 1, 'friends': 16, 'buzz': 12, 'lightyear': 12, 'charley': 1, 'chaplin': 18, 'women': 7, 'german': 3, 'burt': 7, 'goobers': 1, 'titles': 36, 'nation': 2, 'lampoon': 1, 'depicts': 1, 'earth': 13, 'being': 24, 'destroyed': 1, 'asteroid': 1, 'officers': 1, 'capitalism': 1, 'communism': 1, 'ww': 2, 'helped': 1, 'guote': 1, 'odyssey': 8, 'lean': 16, 'agent': 7, 'crackers': 1, 'soup': 2, 'change': 4, 'habit': 1, 'johhny': 2, 'alice': 3, 'cooper': 8, 'viewed': 1, 'frankie': 2, 'avalon': 2, 'beach': 7, 'scrooged': 1, 'extramarital': 1, 'affairs': 2, 'sound': 4, 'nothing': 4, 'tv': 6, 'prostitution': 3, 'bodyguard': 2, 'johanssons': 1, 'roles': 8, 'presleymovie': 1, 'las': 4, 'vegas': 5, 'charlotte': 1, 'hall': 5, 'wallis': 1, 'madison': 4, 'dance': 6, 'composed': 2, 'arabia': 2, 'ariel': 1, 'fonda': 17, 'tonight': 6, 'matthew': 31, 'broderick': 8, 'romances': 2, 'saving': 4, 'private': 6, 'ronald': 2, 'regan': 1, 'omar': 2, 'gooding': 3, 'ratings': 190, 'above': 54, 'bays': 1, 'gaston': 1, 'nimoy': 5, 'gran': 1, 'torino': 1, 'once': 2, 'december': 2, 'biblical': 2, 'charleton': 1, 'heston': 10, 'colors': 1, 'feed': 1, 'search': 17, 'fireworks': 2, 'ricky': 5, 'gervais': 1, 'pair': 1, 'patriot': 1, 'pow': 2, 'camps': 2, 'americans': 2, 'spacey': 8, 'half': 8, 'baked': 5, 'et': 7, 'cat': 7, 'hit': 9, 'mission': 7, 'impossible': 6, 'gantry': 1, 'inception': 72, 'much': 7, 'money': 9, 'earn': 1, 'sow': 1, 'rainbow': 8, 'blacksploitation': 3, 'voodoo': 1, 'saw': 7, 'jingle': 1, 'way': 3, 'singlteon': 1, 'parody': 8, 'frankenweenie': 1, 'raja': 1, 'court': 1, 'vinny': 2, 'charlton': 10, 'hestons': 1, 'final': 4, 'died': 1, 'fancy': 1, 'gown': 1, 'blitz': 1, 'japanese': 4, 'mushrooms': 1, 'mcgregor': 2, 'six': 171, 'goodbar': 1, 'skyscraper': 1, 'dad': 2, 'ahead': 4, 'marmalade': 1, 'synopsis': 2, 'comic': 6, 'africa': 5, 'soundrack': 1, 'ferris': 4, 'buelers': 4, 'mivie': 1, 'antagonist': 1, 'invisible': 2, 'cate': 5, 'blanchett': 5, 'les': 3, 'starts': 6, 'titans': 1, 'romero': 1, 'happening': 1, 'reading': 1, 'theron': 11, 'moranis': 2, 'balls': 4, 'read': 1, 'eastbound': 1, 'threme': 1, 'futureovies': 1, 'march': 2, 'roddy': 5, 'mcdowel': 4, 'isaaks': 1, 'grisham': 1, 'spencer': 13, 'tracy': 16, '1965': 2, 'creature': 4, 'gollum': 1, 'metaphor': 1, 'environmental': 1, 'causes': 1, 'commander': 2, 'data': 1, 'folk': 1, 'bands': 2, 'extra': 4, 'terrestrial': 4, 'previews': 2, 'sandlers': 1, 'zoolander': 1, 'heavyweights': 1, 'pan': 2, 'episode': 3, 'paul': 42, 'newman': 15, 'jazz': 2, 'viwers': 1, 'why': 4, 'married': 4, 'spy': 8, 'obsessed': 1, 'sings': 4, 'blowers': 1, 'daughter': 5, 'field': 5, 'sharon': 3, 'meat': 2, 'gross': 7, 'ultimatum': 2, 'cranky': 1, 'teething': 1, 'wich': 1, 'forever': 1, 'salsa': 1, 'weekend': 3, 'bernies': 1, 'pop': 2, 'culture': 1, 'rutger': 1, 'hauer': 1, 'darrel': 2, 'hanna': 3, 'brooks': 21, 'o': 2, 'brother': 2, 'thou': 1, 'sub': 1, 'titled': 24, 'samuel': 12, 'l': 16, 'frost': 2, 'witness': 1, 'precognitive': 1, 'along': 2, 'wha': 1, 'karate': 1, 'pipi': 2, 'longstocking': 2, 'mean': 3, 'girls': 7, 'blade': 3, 'runner': 2, 'flaming': 1, 'computers': 2, 'sant': 1, 'val': 6, 'kilmer': 6, 'romeros': 1, 'direcorial': 1, 'debut': 2, 'person': 5, 'few': 2, 'disneypixar': 1, '2009': 8, 'realesed': 1, 'katy': 3, 'liquid': 1, 'terminatior': 1, 'ai': 1, 'artificial': 3, 'intelligence': 6, 'rebel': 6, 'against': 1, 'golf': 3, '1985': 4, 'watched': 3, 'benefits': 4, 'razzie': 2, 'depps': 2, 'piece': 4, 'frankly': 2, 'dear': 2, 'dont': 9, 'damn': 3, 'acedemy': 1, 'nominees': 1, 'often': 2, 'shown': 5, 'wa': 1, 'legend': 5, 'katherine': 7, '2pac': 1, 'shakur': 1, 'whar': 1, 'doing': 1, 'appearance': 4, 'commando': 1, 'valkyrie': 1, 'meyers': 2, 'fincher': 14, '2008': 7, 'rotten': 2, 'tomatoes': 2, 'ant': 1, 'bully': 3, 'yul': 2, 'brenner': 2, 'death': 29, 'forrester': 2, 'boorman': 4, 'unforgiven': 2, 'biker': 2, 'marlboro': 1, 'rod': 3, 'serling': 1, 'leslie': 5, 'gore': 7, 'sing': 5, 'sunshine': 1, 'lolipops': 1, 'rainbows': 1, '1958': 1, 're': 2, 'written': 2, 'wiig': 1, 'fletcher': 1, 'conspiracy': 1, 'theory': 1, 'rhett': 1, 'butler': 4, 'boyle': 5, 'twist': 3, 'shout': 1, 'version': 8, 'popeye': 2, 'schindlers': 1, 'colin': 6, 'higgins': 1, 'dolly': 2, 'parton': 2, 'hal': 1, 'plant': 1, 'tirantino': 1, 'supernatural': 4, 'hakuna': 1, 'matata': 1, 'destination': 2, 'olivia': 4, 'newton': 5, 'titantic': 1, 'players': 2, 'eisenhower': 1, 'power': 12, 'outer': 3, 'limits': 2, 'episodes': 1, 'included': 5, 'kidmans': 2, 'neilson': 1, 'godfellas': 1, 'howards': 1, 'production': 4, 'partner': 1, 'micky': 1, 'rooney': 1, 'own': 2, 'arc': 2, 'daniel': 22, 'jordan': 4, 'p': 4, 'tv13': 1, 'darby': 1, 'shaw': 1, 'annette': 5, 'blanket': 1, 'bingo': 1, 'sabrina': 3, 'blood': 11, 'honey': 2, 'psycho': 2, 'babysitter': 3, 'wife': 7, 'south': 7, 'african': 5, 'cage': 11, 'hollywood': 4, 'redford': 9, 'mcqueen': 13, 'poor': 1, 'west': 7, 'side': 8, 'bieber': 1, 'native': 2, 'acorns': 1, 'trainer': 1, 'working': 5, 'girl': 23, '1939': 1, 'cancer': 3, 'washingtons': 1, 'trouble': 1, 'considering': 2, 'frightening': 1, 'golfing': 1, 'tubthumping': 1, 'chased': 1, 'bear': 5, 'wood': 10, 'sheriff': 1, 'brody': 2, 'nolan': 15, 'rival': 1, 'magician': 1, 'broke': 3, 'records': 2, '1000': 2, 'corpes': 1, 'doris': 2, 'sharp': 1, 'shooter': 1, 'aragorn': 1, 'queens': 1, 'stared': 6, 'kiddman': 1, 'alices': 1, 'restaurant': 1, 'flying': 3, 'repeated': 1, 'scarface': 4, 'gangstas': 1, 'paradise': 1, 'wit': 1, 'ustinov': 3, 'pat': 1, 'benetar': 1, 'q': 1, 'rap': 1, 'mike': 12, 'tyson': 4, 'tiger': 3, 'gallo': 5, 'elton': 1, 'chocolate': 7, 'factory': 9, 'gilmore': 1, 'shakespeare': 4, 'denmark': 1, 'lamp': 1, 'nc17': 1, 'wname': 1, 'collaborates': 1, 'iv': 2, 'gods': 1, 'framed': 2, 'rabbit': 3, 'met': 4, 'sally': 2, 'crystals': 1, 'young': 11, 'frankenstein': 6, 'mental': 5, 'institution': 3, 'pacifist': 1, 'historic': 3, 'raiders': 5, 'ark': 3, 'statham': 1, 'hired': 3, 'balboa': 1, 'actual': 1, 'cult': 3, 'classics': 2, 'cloris': 1, 'leachman': 1, 'oif': 1, 'segel': 2, 'growing': 4, 'seth': 8, 'rogens': 1, 'pineapple': 1, 'express': 1, 'banned': 1, 'ship': 6, 'wwi': 1, 'middle': 4, 'east': 2, 'kelly': 15, 'debbie': 3, 'mall': 1, 'butterfinger': 1, 'nautical': 3, 'huston': 12, 'heavy': 3, 'metal': 3, 'tribute': 3, 'achilles': 1, 'endless': 1, '1940': 127, '1970': 162, 'ellen': 2, 'roark': 1, 'bars': 1, 'oldman': 6, 'rachel': 3, 'mcadams': 2, 'her': 9, 'allie': 1, 'hamilton': 8, 'britanny': 1, 'thin': 2, 'hasta': 1, 'vista': 1, 'start': 3, 'today': 3, 'prostitute': 3, 'godzilla': 3, 'pool': 1, 'monty': 5, 'pythons': 1, 'circus': 3, 'korea': 1, 'luther': 2, 'voight': 1, 'geres': 1, 'spoken': 1, 'orson': 22, 'welles': 22, 'sidney': 1, 'poitier': 1, 'fox': 13, 'v': 2, 'ringwald': 2, 'owning': 1, 'ponies': 1, 'record': 1, 'label': 1, 'rim': 1, 'english': 2, 'speaking': 1, 'salma': 2, 'hayek': 2, 'gleason': 2, 'bale': 15, 'troopers': 1, 'racing': 6, 'col': 1, 'oneill': 2, 'stargate': 1, 'continuum': 1, 'forgetting': 2, 'sarah': 8, 'britan': 1, 'myers': 3, 'moldy': 1, 'peaches': 1, 'parents': 1, 'blind': 5, 'dracula': 5, 'apolo': 1, 'multiplicity': 1, 'dealing': 11, 'disease': 1, 'roll': 1, 'scout': 1, 'diamonds': 1, 'cats': 3, 'showcase': 1, 'acting': 6, 'talent': 1, 'pierce': 7, 'brosnan': 2, 'bedazzled': 1, 'watson': 6, 'circle': 1, 'perrys': 1, 'screen': 1, 'isaac': 3, 'biopic': 2, 'blading': 1, 'moves': 4, 'kong': 2, 'gorrilla': 1, 'h': 5, 'macy': 3, 'earned': 4, 'firth': 3, 'katniss': 2, 'success': 2, 'independent': 31, 'neil': 1, 'harris': 4, 'travelling': 2, 'astronaut': 1, 'peace': 1, 'driving': 2, 'daisy': 1, 'hercules': 2, 'york': 10, 'undertaker': 2, 'buried': 3, 'similiar': 1, 'youve': 1, 'mail': 1, 'air': 5, 'force': 2, 'pulp': 1, 'twelve': 1, 'mainstream': 2, 'sick': 2, 'killers': 5, 'saying': 8, 'shot': 7, 'stardom': 1, 'containing': 3, 'koreas': 1, 'musketeers': 2, 'theplot': 1, 'while': 1, 'sleeping': 1, 'lassie': 1, 'lucille': 2, 'provide': 15, 'cleese': 5, 'somg': 1, 'notting': 2, 'farmer': 1, 'corn': 1, 'boy': 11, 'saves': 2, 'gabor': 2, 'piper': 1, 'arent': 2, 'violent': 7, 'liberace': 1, 'titular': 1, 'track': 3, 'focus': 7, 'concept': 2, 'seeing': 4, 'ground': 1, 'leaves': 2, 'then': 3, 'animation': 70, 'braveheart': 4, 'stein': 1, 'wish': 5, 'dogfight': 1, 'dragon': 3, 'tatoo': 1, 'mob': 8, 'live': 7, 'banjo': 1, 'belong': 1, 'airport': 1, 'cloning': 1, 'morgan': 12, 'freeman': 11, 'mila': 2, 'kunis': 2, 'johnson': 7, 'dicaprios': 2, 'pregnant': 1, 'teenager': 2, '007': 1, 'moviies': 1, 'perfrmance': 1, 'stuart': 7, 'apes': 2, 'mary': 7, 'reilly': 1, 'mustangs': 1, 'portmans': 1, 'robbie': 1, 'skywalker': 1, 'waterfront': 1, 'sometime': 2, '1994': 3, 'simba': 1, 'potte': 1, 'mrovies': 1, 'gonna': 2, 'fly': 4, 'zimmer': 2, 'dinasour': 1, 'fron': 1, 'cushing': 2, 'eddie': 9, 'murphys': 2, 'paris': 3, 'right': 48, 'caddyshack': 2, 'acts': 1, 'sword': 2, 'sorcery': 2, 'madonna': 4, 'went': 3, 'undercover': 5, 'beauty': 10, 'pageant': 1, 'emmy': 2, 'rossum': 2, 'bulma': 1, 'navy': 3, 'pilot': 2, 'warden': 1, 'lola': 1, 'swahili': 1, 'gangster': 22, 'wonka': 8, 'choclate': 1, 'witch': 14, 'wizard': 7, 'oz': 4, 'jean': 11, 'dujardin': 1, 'translation': 1, 'rihanna': 1, 'current': 1, 'ends': 2, 'ester': 1, 'seabiscuit': 1, 'possible': 3, 'fun': 4, 'foxs': 1, 'berrys': 1, 'hungry': 1, 'again': 3, 'originate': 1, 'christo': 1, 'son': 3, 'kills': 2, 'mother': 5, 'leigh': 21, 'anne': 5, 'tuohy': 1, 'extreme': 5, 'violence': 17, 'work': 5, 'hughes': 3, '1984': 2, 'cg': 1, 'presleyovie': 1, 'kubricks': 1, 'johannsen': 1, 'gloria': 4, 'grahame': 2, '1960': 169, 'dolphins': 2, 'frog': 2, 'shirley': 8, 'earliest': 1, 'johnathan': 1, 'tayler': 1, '28': 2, 'wonderland': 3, 'denver': 2, 'don': 9, 'knotts': 3, 'rollerball': 1, 'pony': 1, 'entirely': 2, 'furry': 2, 'monster': 8, 'worked': 5, 'green': 7, 'eyed': 2, 'gang': 5, 'warriors': 2, 'avildsen': 1, 'karat': 1, 'night': 29, 'voldemort': 1, 'bythe': 2, 'danner': 2, '1979': 4, 'forrest': 2, 'gump': 1, 'maids': 1, 'greek': 1, 'mythology': 2, '1950': 162, 'guys': 2, 'escaped': 1, 'chain': 2, 'supermans': 1, 'bakeds': 1, 'if': 17, 'so': 7, 'ones': 1, 'redd': 2, 'babysitters': 2, 'issaiah': 1, 'kurt': 5, 'russell': 15, 'ritchies': 1, 'pets': 2, 'glen': 2, 'russia': 1, 'socery': 1, 'bugs': 2, 'regarded': 1, '1943': 1, 'spaghetti': 41, 'galahad': 1, 'videodrome': 1, 'pictures': 4, 'bollywood': 3, 'prominent': 3, 'panama': 1, 'halen': 1, 'airheads': 1, 'serial': 4, 'hughs': 1, 'mannequin': 1, 'experiments': 5, 'barber': 1, 'writtin': 1, 'rogan': 1, 'ringo': 1, 'starr': 1, 'prometheus': 2, 'aside': 2, 'student': 1, 'winfield': 1, 'sottish': 1, 'deep': 4, 'lauren': 5, 'bacall': 3, 'shogun': 1, 'sherlock': 4, 'holmes': 7, 'joins': 1, 'carnival': 1, 'sam': 14, 'worthington': 1, 'penguins': 1, 'ted': 5, 'lawyer': 1, 'hidden': 2, 'psychic': 3, 'abducts': 1, 'child': 25, 'rko': 1, 'ginger': 4, 'rogers': 6, 'ralph': 9, 'latifah': 3, 'italy': 3, 'veteran': 1, 'dinosaurs': 5, 'empire': 2, 'craig': 6, 'kindergarten': 1, 'diver': 2, 'saga': 2, 'eclipse': 2, 'doug': 3, 'glatt': 1, 'schooner': 1, 'sailboat': 1, 'unexpected': 1, 'journey': 8, '14th': 1, 'tony': 9, 'randal': 2, 'jetsons': 1, 'jet': 5, 'li': 4, 'opposite': 5, 'tights': 1, 'goldie': 4, 'hawn': 6, 'surfing': 1, 'laura': 6, 'linney': 1, 'kicking': 1, 'screaming': 2, 'hot': 3, 'gorgo': 1, '300': 4, 'rodney': 6, 'dangerfield': 3, '1968': 1, 'solved': 1, 'thier': 1, 'issues': 5, 'season': 3, 'polynesian': 1, 'bettany': 1, 'charles': 22, 'darwin': 1, 'gogh': 1, 'silberling': 1, '1974': 1, 'sid': 1, 'krofft': 1, 'lmovie': 1, 'mason': 2, 'boxer': 4, 'bamed': 1, 'flynn': 3, 'geoffrey': 2, 'marquis': 1, 'sade': 1, 'punk': 1, 'eddy': 6, 'kathy': 4, 'bates': 3, 'youth': 2, 'farce': 3, 'alli': 1, 'magraw': 1, 'addiction': 2, 'claude': 3, 'damme': 2, 'almighty': 1, 'location': 1, 'cyd': 1, 'charisse': 1, 'danced': 1, 'deborah': 1, 'foreman': 1, 'bulge': 1, 'fresno': 1, 'california': 1, 'webb': 2, 'october': 2, 'diazs': 2, 'civil': 9, 'supremacy': 1, 'freida': 1, 'mock': 1, 'jericho': 2, 'wanna': 1, 'cannonball': 1, 'thelma': 1, 'louise': 1, 'opera': 6, 'bernie': 3, 'mac': 3, 'bath': 1, 'tub': 2, 'nims': 1, 'smiths': 2, 'con': 3, 'becomes': 1, 'commodities': 1, 'trader': 1, 'parsons': 1, 'plots': 5, 'doctor': 4, 'nyc': 1, 'kill': 10, 'members': 2, 'katie': 2, 'mistaken': 2, 'scorcese': 1, 'blanc': 2, 'kennedy': 2, 'marooned': 2, 'committed': 1, 'tommy': 10, 'lights': 3, 'call': 3, '1900s': 1, 'stood': 1, 'still': 3, 'treasure': 5, 'pioneers': 2, 'fisher': 2, 'soldiers': 7, 'jolson': 1, 'littlest': 1, 'workers': 1, 'lily': 1, 'tomlin': 1, 'gambling': 1, 'marvin': 2, 'throws': 1, 'boiling': 1, 'coffee': 1, 'grahames': 1, 'face': 2, 'soul': 3, 'gandalf': 2, 'niro': 4, 'verbinski': 3, 'viggo': 5, 'mortensen': 5, 'fasten': 1, 'seatbelts': 1, 'ides': 1, 'crawford': 5, 'governor': 1, 'louisiana': 1, 'costners': 1, 'contacts': 1, 'trailerfor': 1, 'ewoks': 1, 'hays': 1, 'escape': 5, 'turkish': 1, 'kis': 1, 'meryly': 1, 'lawyers': 2, 'each': 1, 'pageants': 1, 'merchant': 1, 'antony': 2, 'zelweger': 1, 'lindsey': 2, 'feels': 1, 'gangsta': 1, 'cagney': 12, 'quaids': 1, 'keaches': 1, 'carradines': 1, 'jesse': 5, 'racial': 2, 'differences': 1, 'soon': 9, 'penelope': 2, 'cruzs': 1, 'language': 2, 'filmography': 1, 'akira': 8, 'kurusawas': 1, 'fools': 1, 'magicians': 1, '571': 1, 'mexican': 3, 'powered': 2, 'individual': 2, 'million': 3, 'dollar': 2, 'corpse': 1, 'kubrik': 1, 'barbara': 8, 'stanwyck': 4, 'mcmurray': 1, 'snider': 1, 'strangland': 1, 'belushi': 5, 'cameos': 1, 'freddie': 1, 'sonny': 1, 'steele': 2, 'citizen': 3, 'kane': 3, 'chinese': 2, 'crenna': 1, 'salesman': 2, 'swayzes': 1, 'apocalypse': 4, 'olaf': 1, 'asian': 1, 'soccer': 5, 'pryor': 6, 'anti': 1, 'erasing': 2, 'memory': 7, 'catherine': 8, 'zeta': 5, 'andersons': 1, 'window': 3, 'monkeys': 2, 'richie': 2, 'rich': 2, 'return': 4, 'jedi': 2, 'categories': 8, 'tarot': 1, 'card': 1, 'reader': 1, 'apocalpse': 1, 'swordsman': 1, 'party': 10, 'werner': 2, 'herzog': 2, 'threatened': 1, 'dimples': 1, 'provided': 1, 'himself': 1, 'kruger': 1, 'snape': 1, 'francos': 1, 'dumb': 1, 'dumber': 1, 'lightning': 2, 'found': 6, 'believe': 2, '1944': 1, 'born': 2, 'connick': 1, 'jr': 22, 'doll': 4, 'chuckie': 1, 'artist': 1, 'colonel': 1, 'fosse': 1, 'dowop': 1, 'veiwers': 1, 'ozarks': 1, 'emil': 1, 'renny': 1, 'overboard': 1, 'winter': 4, '68th': 1, 'rose': 4, 'korean': 1, 'punks': 1, 'hugh': 8, 'project': 3, 'better': 7, 'hangover': 2, 'upcoming': 5, 'lizard': 2, 'elephant': 1, 'kline': 3, 'helena': 5, 'bonham': 4, 'carter': 11, 'ancient': 1, 'indians': 4, 'mexico': 5, 'latin': 1, 'revolutions': 2, 'goldgerg': 1, 'hitchcocks': 1, 'french': 6, 'captain': 6, 'picard': 1, 'cheerleaders': 2, 'dan': 12, 'akyrod': 1, 'marlon': 12, 'brando': 8, 'we': 5, 'consider': 2, 'relied': 1, 'kindness': 1, 'strangers': 1, 'herbie': 2, 'bananas': 1, 'duvall': 9, 'country': 3, 'wrestling': 3, 'lightening': 1, 'rainmaker': 1, 'bongo': 1, 'mark': 22, 'wahlberg': 2, 'mccall': 1, 'different': 2, 'voorhees': 1, 'ex': 1, 'nba': 1, 'martins': 2, 'madam': 1, 'cullen': 4, 'moss': 1, 'dalai': 1, 'lama': 1, 'sloth': 1, 'brooke': 2, 'shields': 2, 'orleans': 1, 'prostitutes': 1, 'ladies': 2, 'oprah': 1, 'winfrey': 1, 'henson': 2, 'recommend': 30, 'australian': 1, 'pearce': 1, 'bears': 2, 'strips': 1, 'nude': 2, 'sjywalker': 1, 'lemmon': 14, 'avanti': 1, 'bag': 2, 'lions': 2, 'audrey': 9, 'nun': 1, 'belgian': 1, 'congo': 1, 'complete': 2, 'told': 2, 'kidnapped': 3, 'caine': 10, 'ireland': 1, 'atleast': 1, 'dreaming': 1, 'living': 7, 'kenny': 2, 'loggins': 1, 'akyroyd': 1, 'englends': 1, 'elm': 1, 'st': 2, 'college': 5, 'spiral': 2, 'staircase': 1, 'dorothy': 2, 'mcguire': 1, 'amnesia': 1, 'sufferer': 1, 'babylon': 2, 'adwhat': 1, 'tattooed': 1, 'diesels': 1, 'hand': 1, 'inceptioncategories': 1, 'england': 2, 'those': 1, 'brasher': 1, 'doubloon': 1, 'merlin': 2, 'kim': 5, 'basinger': 2, 'speilburg': 1, 'fortress': 2, 'ginnifer': 1, 'goodwin': 1, 'purple': 9, 'rain': 12, 'joyful': 2, 'holden': 2, 'joan': 12, 'bennett': 1, 'angela': 4, 'lansbury': 1, 'alaska': 2, 'gwyenth': 1, 'paltrow': 2, 'radio': 2, 'lounge': 1, 'annie': 1, 'racer': 1, 'whisperer': 1, 'louis': 3, 'malle': 2, 'lana': 1, 'european': 1, 'thornton': 2, 'alyson': 3, 'kidnapping': 1, 'fist': 2, 'nesbitt': 1, 'leave': 1, 'law': 2, 'tamilyn': 1, 'tomita': 1, 'heralded': 1, 'adapted': 1, 'wild': 12, 'burgess': 1, 'rabbits': 2, 'glow': 1, 'condom': 1, 'short': 49, 'megan': 3, 'actionsci': 2, 'infamous': 2, 'thorton': 2, 'ferrel': 2, 'c': 9, 'riely': 1, 'fat': 1, 'liotta': 2, 'amp': 4, 'hayak': 2, 'thunder': 1, 'superheros': 1, 'between': 6, 'battlefield': 3, 'help': 23, 'fellow': 1, 'tragedy': 4, 'sinking': 1, 'calamity': 1, 'boyer': 1, 'tries': 1, 'convince': 1, 'ingrid': 4, 'wyatt': 2, 'earp': 2, 'holliday': 2, 'fraser': 2, 'navarone': 2, 'whole': 6, 'rodrick': 1, 'estelle': 1, 'getty': 1, 'newly': 1, 'murderer': 3, 'united': 2, 'states': 1, 'fluffy': 1, 'deanna': 1, 'durbin': 1, 'francois': 7, 'truffaut': 7, 'scientist': 2, 'klingons': 1, 'fights': 2, 'oddjob': 1, 'seen': 19, 'stapler': 1, 'case': 3, 'mondays': 1, 'snakes': 3, 'welcome': 2, 'carmen': 4, 'someone': 18, 'dears': 1, 'neve': 1, 'trey': 1, 'sleepless': 1, 'seattle': 1, 'holds': 1, 'takes': 8, 'hostages': 1, 'bud': 4, 'receiver': 1, 'lupus': 1, 'skin': 2, 'revolutionary': 2, 'flight': 1, 'navigator': 1, 'bane': 1, 'cruses': 1, 'sisters': 1, 'lina': 1, 'wertmuller': 1, 'candice': 2, 'pig': 2, 'demons': 6, 'flesh': 5, 'fishes': 1, 'dori': 1, 'deneuve': 2, 'penny': 2, 'sees': 3, 'fountain': 1, 'suceessful': 1, 'unlaterally': 1, 'deliverance': 1, 'holly': 1, 'hunter': 9, 'kidnap': 1, 'uma': 2, 'thurman': 2, 'hayes': 2, 'regina': 2, 'speedway': 2, 'malcom': 3, 'radcliffe': 1, 'dances': 2, 'snake': 1, 'hudson': 5, 'race': 4, 'cash': 1, 'gwyneth': 1, 'receives': 1, 'former': 1, 'beetlejuice': 5, 'stop': 1, 'believin': 1, 'deneuvre': 1, 'mom': 2, 'wjat': 1, 'isla': 1, 'lang': 9, 'fishers': 1, 'soap': 1, 'elisabeth': 4, 'shue': 4, 'mclaine': 1, 'marriage': 5, 'jude': 1, 'kelley': 1, 'submarines': 1, 'buddy': 3, 'zanzibar': 1, 'snatch': 2, 'gennie': 1, 'pigs': 1, 'sequelsprequels': 1, 'marathon': 1, 'deal': 2, 'childhood': 2, 'memories': 1, 'jungle': 5, 'soldier': 2, 'vietnam': 3, 'montana': 1, 'pacinos': 1, 'newer': 1, 'backward': 1, 'aging': 1, 'father': 5, 'emily': 1, 'blunt': 1, 'wears': 1, 'prada': 1, 'gotta': 1, 'friend': 7, 'received': 211, 'scotts': 1, 'elliott': 3, 'gould': 2, 'teller': 1, 'raging': 2, 'bull': 2, 'shyamalan': 2, 'order': 6, 'gerald': 1, 'step': 1, 'roller': 2, 'derby': 1, 'aids': 2, 'hiv': 1, 'rosario': 1, 'dawson': 3, 'pearl': 2, 'bailey': 1, '1987': 1, 'chess': 1, 'match': 1, 'joy': 2, 'schwarzeneggers': 1, 'katharine': 10, 'cape': 1, 'fear': 11, 'iron': 2, 'catchphrase': 1, 'enjoy': 2, 'later': 4, 'elijah': 3, 'turns': 1, 'into': 7, 'chariots': 3, 'fire': 6, 'christie': 2, 'michelle': 6, 'trilogies': 2, 'angie': 1, 'dickinson': 1, 'speak': 1, 'inmvented': 1, 'depression': 3, 'listall': 1, 'ducks': 2, 'ledgers': 1, 'paranorman': 1, 'jenny': 3, 'mccarthy': 3, 'swank': 1, 'methods': 1, 'game': 3, 'python': 2, 'holy': 5, 'grail': 3, 'kiss': 10, 'ruin': 1, 'heigel': 1, 'clark': 4, 'gable': 3, 'reporter': 1, 'claudette': 1, 'colbert': 1, 'heiress': 1, 'jiminy': 1, 'cricket': 1, 'lancaster': 3, 'espianoge': 1, 'husband': 3, 'michigan': 1, 'hereafter': 1, 'orwells': 1, 'liam': 11, 'neeson': 10, 'slap': 1, 'mimi': 2, 'duchovney': 1, 'lynch': 14, 'vivien': 14, 'raiser': 2, 'cosby': 4, 'roadhouse': 1, 'ride': 2, 'surf': 1, 'jonze': 1, 'beyond': 3, 'unfortunate': 1, 'events': 1, 'crab': 2, 'grodin': 1, 'whom': 1, 'nigel': 1, 'suspicion': 1, 'highschool': 1, 'dennis': 4, 'hopper': 2, 'canadian': 1, 'leo': 1, 'gregory': 14, 'brian': 23, 'broadway': 4, 'shows': 1, 'dwayne': 1, 'nazis': 2, 'cornfield': 1, 'owl': 1, 'rocket': 1, 'waiting': 1, 'tables': 1, 'june': 2, 'laughton': 1, 'tyrone': 2, 'trial': 2, 'panic': 2, 'room': 3, 'past': 430, 'turtle': 1, 'stripes': 1, 'perlman': 2, 'assasins': 1, 'nerds': 1, 'ken': 3, 'pioneered': 1, 'cgi': 3, 'techniques': 1, 'secretariat': 1, 'cure': 1, 'ballet': 1, 'costner': 5, 'dystopian': 1, 'bomb': 2, 'longest': 1, 'yard': 1, 'decaprio': 1, 'controlling': 1, 'franki': 1, 'lane': 3, 'pet': 2, 'versions': 1, 'carol': 3, 'miracle': 1, 'juno': 1, 'ghostbusters': 2, 'angels': 5, 'duel': 1, 'burns': 3, 'danson': 1, 'sammy': 1, 'rhythm': 1, 'lifes': 1, 'worker': 1, 'coppolas': 1, 'pendleton': 1, 'heaven': 4, 'butterfly': 2, 'effect': 1, 'ws': 1, 'noyce': 1, 'moneyball': 1, 'nudity': 1, 'andy': 8, 'perfect': 3, 'storm': 2, 'bite': 1, 'racehorse': 1, 'bruckheimer': 1, 'houses': 1, 'avatars': 1, 'differ': 1, 'hasselhoff': 1, 'mutiny': 2, 'footloose': 2, 'coaches': 1, 'championship': 1, 'cross': 1, 'dressing': 1, 'masked': 1, 'minute': 1, 'lucy': 3, 'liu': 1, 'linda': 5, 'ronstadt': 1, 'here': 4, 'reynold': 1, 'plimpton': 1, 'alda': 3, 'collette': 1, 'greg': 8, 'kinnear': 1, 'contest': 1, 'scorseses': 2, 'arkin': 1, 'terrorizes': 1, 'gigi': 1, 'scottish': 1, 'schoolteacher': 1, 'veronica': 2, 'lake': 5, 'nicolas': 6, 'smashing': 1, 'pumpkins': 1, 'jake': 2, 'gyllanhall': 1, 'airlines': 1, 'plane': 4, 'brue': 1, 'walter': 5, 'mathau': 1, '3000': 1, 'kaiser': 1, 'soze': 1, 'marijuana': 1, 'ghandi': 1, 'animates': 1, 'rights': 3, 'crossdressing': 2, 'diggstown': 1, 'scanner': 1, 'darkly': 1, 'sir': 1, 'eye': 2, 'trolley': 1, 'loaf': 1, 'ovie': 1, 'baskervilles': 1, 'gutenburg': 1, 'eve': 5, 'rib': 1, 'pokemon': 1, 'brians': 1, 'caan': 1, 'omedy': 1, 'toby': 3, 'keith': 8, 'pup': 1, 'leader': 1, 'm2m': 1, 'adaptation': 2, 'jk': 1, 'rowling': 1, 'number': 2, 'rule': 1, 'aviator': 1, 'publisher': 1, 'jameson': 1, 'bird': 5, 'crimson': 1, 'tide': 2, 'caddy': 1, 'crippled': 1, 'hoodwinked': 1, 'utopia': 3, 'spiders': 1, 'biopics': 1, 'rudd': 1, 'cuba': 2, 'stealing': 1, 'befriending': 1, 'truck': 1, 'dean': 7, 'willem': 1, 'dafoe': 1, 'chopper': 1, 'frodo': 2, 'bunny': 2, 'descendants': 1, 'tin': 2, 'cup': 1, 'barbra': 1, 'streisand': 1, 'rogen': 1, 'banks': 1, 'bobby': 1, 'cruz': 3, 'era': 2, 'alcoholism': 2, 'everybody': 2, 'wants': 2, 'thanksgiving': 1, 'cooking': 1, 'debra': 8, 'messing': 5, 'mmpa': 1, 'basketnall': 1, 'diaries': 3, 'sequal': 1, 'wildcats': 1, 'hello': 1, 'nielson': 3, 'drum': 1, 'wheedon': 1, 'guess': 1, 'dinner': 1, 'kept': 1, 'fromthe': 1, 'dawn': 4, 'kiefer': 3, 'versus': 4, 'martians': 1, 'contain': 2, 'cruises': 2, 'fantacy': 1, 'cruse': 1, 'apocolypse': 1, 'lambada': 1, 'spinal': 1, 'tap': 1, 'kardashian': 1, 'contribute': 1, 'hop': 1, 'springtime': 1, 'taxi': 5, 'narnia': 1, 'adaption': 1, 'weithorn': 1, 'valet': 1, 'sells': 1, 'secrets': 5, 'eat': 1, 'worlds': 2, 'hookey': 1, 'phantom': 1, 'criticism': 1, 'robbins': 3, 'prisoner': 5, 'gelenhal': 1, 'actually': 2, 'changed': 1, 'bringing': 2, 'huge': 1, 'worms': 1, 'combine': 1, 'rookie': 1, 'liv': 2, 'took': 3, 'faces': 3, 'earnest': 1, 'borgnine': 1, 'landau': 1, 'older': 1, 'bela': 3, 'lugosi': 3, 'jodi': 1, 'forster': 1, 'running': 1, 'brave': 1, 'shower': 1, 'wanderin': 1, 'brown': 4, 'sugar': 1, 'tinman': 1, 'teresa': 1, 'opening': 2, 'bennie': 1, 'joone': 1, 'listen': 1, 'actin': 1, 'seagal': 2, 'mcdowall': 2, 'gay': 1, 'themes': 1, 'subcultures': 1, 'keenan': 1, 'bought': 1, 'wachowski': 2, 'fame': 1, 'bowie': 2, 'childs': 1, 'dakota': 1, 'fanning': 1, 'lots': 1, 'singers': 2, 'f': 4, 'gray': 4, 'chameleon': 1, 'shreck': 1, 'christine': 5, 'captian': 1, 'locked': 1, 'basement': 1, 'dana': 2, 'jfks': 1, 'funeral': 4, 'quazi': 1, 'motto': 1, 'loser': 1, 'photographer': 1, 'zealand': 1, 'mcclain': 2, 'fair': 2, 'weather': 2, 'dailey': 1, 'jaime': 2, 'clarkson': 1, 'beginning': 2, 'brokeback': 1, 'mountain': 3, 'because': 1, 'seasons': 2, 'gossip': 1, 'poehler': 1, 'rates': 1, 'hubbard': 1, 'accused': 1, 'low': 2, 'brick': 2, 'leavitts': 1, 'knowing': 1, 'whistle': 1, 'full': 5, 'yoda': 6, 'anger': 1, 'hate': 1, 'sting': 3, 'mirror': 2, 'bus': 1, 'amber': 4, 'waves': 1, 'cleef': 1, 'fully': 1, 'loaded': 1, 'fir': 1, 'reeve': 1, 'mcclure': 1, 'breath': 1, 'airplanes': 1, 'victor': 3, 'mature': 1, 'nick': 5, 'norah': 1, 'town': 8, 'station': 2, 'zebra': 1, 'crush': 1, 'farrah': 1, 'faucett': 1, 'according': 1, 'coach': 3, 'ozzy': 1, 'osbourn': 1, 'rag': 2, 'square': 1, 'wide': 1, 'shut': 1, 'gi': 1, 'prodigy': 3, 'pianist': 1, 'atlanta': 1, 'soft': 1, 'core': 1, 'porn': 1, 'hotel': 2, 'hurricane': 1, 'forced': 1, 'hide': 1, 'credits': 1, '60': 1, 'seconds': 1, 'za': 2, 'spent': 1, 'long': 3, 'centers': 36, 'zellweiger': 2, '1933': 1, 'naomi': 1, 'watts': 1, 'adrien': 1, 'hes': 3, 'tramp': 1, 'rango': 1, 'sleepy': 1, 'hollow': 1, 'curse': 4, 'having': 6, 'perkins': 3, 'sundance': 1, 'our': 3, 'macaulay': 1, 'culkin': 4, 'audi': 1, 'carel': 1, 'locally': 1, 'theater': 2, 'break': 1, 'departed': 2, 'cocoon': 1, 'group': 3, 'including': 3, 'thor': 2, 'warren': 6, 'beatty': 1, 'rosalind': 1, 'nurse': 3, 'helps': 1, 'polio': 1, 'patients': 1, 'ashore': 1, 'steiger': 1, 'attacking': 1, 'zardoz': 1, '20000': 1, 'leagues': 1, 'stooges': 5, 'courtroom': 1, 'florence': 1, 'machine': 3, 'sountracks': 1, 'zone': 2, 'kristin': 1, 'novak': 1, 'electra': 3, 'shamolan': 1, 'mockingbird': 1, 'peck': 11, 'radar': 1, 'oreilley': 1, 'kathryn': 1, 'bigelow': 2, '82nd': 1, 'll': 1, 'cool': 2, 'hilton': 1, 'victim': 1, 'sissy': 2, 'spacek': 2, 'unnatural': 1, 'brandon': 7, 'belle': 1, 'smart': 1, 'tootsie': 1, 'general': 5, 'furby': 1, 'benji': 1, 'altman': 3, 'fashion': 1, 'scarefest': 1, 'sweden': 1, 'chester': 1, 'teacher': 1, 'teaser': 1, 'prometheis': 1, 'blofeld': 1, 'enough': 5, 'attack': 3, 'ricci': 1, 'winona': 2, 'ryder': 2, 'chers': 1, 'daughters': 1, 'album': 1, 'angelica': 2, 'benning': 1, 'artists': 2, 'aladdin': 1, 'tattoo': 1, 'patricia': 3, 'arquette': 8, 'raspberry': 1, 'corman': 1, 'surreal': 1, 'ending': 2, 'sense': 1, 'futuristic': 1, 'everything': 1, 'frozen': 2, 'total': 1, 'degaul': 1, 'let': 5, 'hoosiers': 1, 'octopus': 1, 'devon': 4, 'sawa': 2, 'orlando': 1, 'bloom': 1, 'berenger': 2, 'russian': 6, 'defector': 1, 'maclachlan': 1, 'desert': 2, 'fritz': 10, 'fahrenheit': 1, 'itunes': 1, 'transgender': 1, 'allyson': 1, 'chart': 1, 'topping': 1, 'landis': 1, 'pitch': 1, 'closer': 1, 'clive': 1, 'suggest': 9, 'cort': 1, 'thinks': 1, 'whalberg': 1, 'miley': 1, 'cyrus': 2, 'erich': 1, 'von': 3, 'zipper': 1, 'martial': 1, 'arts': 1, 'chuck': 1, 'norris': 1, 'marilyn': 5, 'monroe': 6, 'farrell': 1, 'temple': 1, 'elite': 1, 'controversial': 1, 'bruno': 2, 'suspenseful': 1, 'tate': 4, 'reno': 2, 'ratinggenre': 1, 'ricki': 1, 'peper': 1, 'slater': 3, 'pheiffer': 1, 'catharine': 1, 'athletes': 1, 'dreyfuss': 1, 'ebsen': 1, 'january': 1, 'showboat': 1, 'venice': 1, 'assasin': 1, 'mathematics': 1, 'bean': 3, 'stage': 1, 'door': 4, 'bullitt': 1, 'whoopie': 1, 'waters': 2, 'modine': 5, 'ocean': 2, 'hands': 1, 'scissors': 1, 'nascar': 1, 'schneider': 5, 's': 752, 'heigl': 1, 'excellent': 57, 'held': 3, 'captive': 1, 'denise': 1, 'richards': 2, 'brent': 2, 'carver': 1, 'roy': 1, 'scheider': 1, 'lena': 3, 'horne': 1, 'juliana': 2, 'margulies': 2, 'biographical': 28, 'wildlife': 4, 'domineering': 2, 'riches': 3, 'glickenhaus': 1, 'decent': 49, 'spade': 2, 'liked': 134, 'lot': 49, 'miguel': 2, 'arteta': 1, 'fate': 2, 'tia': 4, 'mowry': 2, 'garber': 1, 'safehouse': 1, 'shemp': 2, 'demon': 2, 'body': 6, 'laughable': 3, 'nine': 172, 'decades': 335, 'jonathon': 3, 'silverman': 2, 'cadillac': 2, 'dysfunctional': 2, 'renfro': 4, 'sibling': 6, 'rivalry': 6, 'very': 104, 'harts': 1, 'rebellion': 2, 'bei': 1, 'po': 2, 'clippety': 1, 'clobbered': 1, 'cotto': 1, 'pacquiao': 1, '7': 2, '04': 1, 'chad': 4, 'fees': 4, 'abortion': 2, 'janeane': 1, 'garofolo': 1, 'petrarca': 1, 'portrait': 11, 'within': 91, 'eight': 172, 'watchable': 39, 'finnes': 2, 'fake': 2, 'research': 2, 'decade': 106, 'bridget': 4, 'wagner': 3, 'sport': 36, 'hack': 1, 'unrated': 149, 'experimentation': 1, 'wiebe': 1, 'obach': 2, 'mcneil': 3, 'haunting': 3, 'portraying': 1, 'combat': 3, 'matthews': 1, 'counter': 3, 'terrorism': 1, 'hewitt': 1, 'anchors': 1, 'aweigh': 1, 'ti': 2, 'involved': 19, 'entertaining': 2, 'schiller': 1, 'birthday': 1, 'imaginary': 2, 'interesting': 5, 'samantha': 2, 'becke': 2, 'dianne': 1, 'vassey': 2, 'ok': 70, 'using': 1, 'nathan': 2, 'frankowski': 1, 'ethan': 2, 'embry': 1, 'rakoff': 2, 'abandonment': 2, 'lessin': 1, 'struggle': 5, 'donovan': 1, 'tamera': 1, 'prowse': 2, 'fairies': 2, 'asher': 2, 'brough': 2, 'laurent': 1, 'bouzereau': 1, 'carchietta': 1, 'goldy': 3, 'ira': 2, 'sachs': 1, 'passfield': 1, 'shannen': 2, 'doherty': 2, 'thumbs': 52, 'near': 7, 'experiences': 2, 'capra': 18, 'avant': 30, 'garde': 30, 'mockumentaries': 3, 'theatre': 2, 'audience': 3, 'joachim': 1, 'schroeder': 1, 'tanks': 3, 'rejection': 3, 'younglove': 1, 'occult': 4, 'callum': 3, 'rennie': 3, 'gracen': 3, 'finance': 4, 'discovery': 5, 'missing': 5, 'rent': 3, 'satire': 2, 'melodrama': 23, 'religious': 2, 'cults': 1, 'multiple': 1, 'murders': 4, 'emotional': 14, 'smallwood': 2, 'levitation': 9, 'sheree': 1, 'mediocre': 57, 'bridges': 2, 'novocaine': 1, 'kohnen': 1, 'sorcerer': 3, 'killing': 1, 'bin': 1, 'laden': 1, 'burnette': 1, 'dutcher': 1, 'estes': 5, 'jorma': 1, 'taccone': 1, 'centered': 24, 'tokar': 1, 'available': 18, 'thought': 8, 'cindy': 2, 'lau': 2, 'development': 2, 'vincente': 1, 'minnelli': 3, 'gunfight': 5, 'appignanesi': 1, 'excited': 1, 'stratosphere': 1, '01': 1, 'horner': 1, 'bergin': 3, 'malkovich': 2, 'kopp': 1, 'bring': 1, 'listing': 2, 'farewell': 2, 'callan': 3, 'mulvey': 3, 'pelican': 1, 'hobbs': 1, 'vacation': 3, 'wands': 2, 'wilderness': 7, 'anyone': 4, 'gendreau': 3, 'generally': 2, 'insanity': 3, 'quattrochi': 1, 'timothy': 2, 'dalton': 1, 'lay': 1, 'cia': 4, 'cain': 2, 'sounds': 1, 'sundays': 1, 'tiffanys': 1, 'polanski': 14, 'recall': 2, 'whether': 3, 'derek': 4, 'code': 3, 'tragic': 1, 'form': 7, 'eugene': 1, 'might': 17, 'revolves': 2, 'warrior': 3, 'vulgarity': 2, 'vivica': 2, 'elves': 4, 'given': 25, 'repos': 2, 'paget': 1, 'erased': 3, 'examples': 2, 'manipulation': 2, 'lines': 2, 'megaton': 1, 'earlier': 1, 'delpy': 3, 'adler': 2, 'masterson': 4, 'larue': 1, 'speedman': 3, 'mahiro': 1, 'maeda': 1, 'dating': 2, 'jaeckel': 1, 'kuntz': 1, 'cobb': 3, 'devils': 2, 'moment': 1, 'repossessed': 1, 'officer': 1, 'homeless': 1, 'palmer': 1, 'sorvino': 10, 'tommi': 1, 'lepola': 1, 'juan': 2, 'delancer': 1, 'chaos': 3, 'direction': 2, 'carolina': 1, 'moon': 6, 'able': 9, 'picardo': 4, 'damiano': 1, 'damiani': 1, 'smuggling': 4, 'hyde': 2, 'sidoni': 2, 'describe': 5, 'terror': 6, 'tournament': 1, 'audition': 9, 'kurosawa': 7, 'autry': 5, 'pakula': 1, 'chapple': 2, 'marine': 1, 'averaged': 6, 'andrei': 5, 'tarkovsky': 5, 'andrew': 10, 'cymek': 1, 'jacobs': 1, 'leman': 1, 'bening': 2, 'haywood': 1, 'antoine': 3, 'fuqua': 1, 'marks': 1, 'genius': 1, 'gags': 4, 'benenson': 1, 'peyton': 1, 'contact': 2, 'palma': 9, 'smrz': 2, 'mysterious': 2, 'talkington': 2, 'carlos': 4, 'leon': 5, 'wedge': 1, 'falling': 2, 'category': 2, 'palko': 1, 'mosbacher': 1, 'deidre': 1, 'denis': 1, 'leary': 1, 'jacobi': 3, 'dermot': 3, 'mulroney': 3, 'dolph': 1, 'lundgren': 1, 'duncan': 3, 'mass': 3, 'eli': 1, 'roth': 2, 'federico': 14, 'fellini': 13, 'downey': 10, 'darabont': 10, 'astaire': 4, 'savage': 2, 'homicide': 1, 'hayao': 12, 'miyazaki': 12, 'narrate': 1, 'experience': 5, 'guillermo': 14, 'del': 15, 'toro': 14, 'shumlin': 1, 'opinion': 3, 'weeks': 1, 'couffer': 1, 'government': 4, 'assassin': 3, 'gold': 12, 'garner': 1, 'horan': 1, 'michelangelo': 11, 'antonioni': 11, 'patric': 1, 'lando': 1, 'slowsky': 1, 'wells': 1, 'whitesell': 1, 'lidstrom': 1, 'kenn': 1, 'navarro': 1, 'kirt': 2, 'gunn': 3, 'mario': 2, 'azzopardi': 1, 'investigations': 2, 'marlee': 2, 'matlin': 2, 'suitable': 2, 'rosen': 1, 'melanie': 2, 'biehn': 4, 'caretaker': 1, 'business': 2, 'marnos': 1, 'ziller': 1, 'painter': 4, 'mcgennis': 1, 'phil': 4, 'hartman': 3, 'siegel': 1, 'mcgowan': 2, 'phillipe': 4, 'neill': 4, 'bernhard': 3, 'sara': 1, 'sugarman': 1, 'lately': 9, 'wincer': 1, 'fugitive': 2, 'dorff': 1, 'simmons': 3, 'terrence': 10, 'malick': 9, 'entertain': 1, 'vittoria': 5, 'sica': 5, 'kramer': 1, 'marcy': 1, 'walker': 2, 'open': 4, 'bonnell': 1, 'loane': 1, 'griffiths': 1, 'laugh': 6, 'rodger': 2, 'los': 3, 'piano': 1, 'agree': 1, 'unrequited': 2, 'eden': 2, 'harvey': 3, 'concerning': 1, 'tara': 1, 'judelle': 1, 'behind': 1, 'enemy': 2, 'remote': 1, 'viewings': 1, 'pee': 2, 'wee': 2, 'hironobu': 1, 'sakaguchi': 1, 'dina': 4, 'meyer': 4, 'averaging': 1, 'grauman': 1, 'explorer': 1, 'jonathan': 8, 'glazer': 2, 'agents': 3, 'lili': 5, 'carry': 9, 'megacorporation': 3, 'happen': 8, 'memoirs': 1, 'wags': 1, 'selfishness': 1, 'across': 2, 'davidson': 4, 'assassination': 5, 'yancovic': 2, 'poltergeist': 1, 'prom': 1, 'sonja': 1, 'truckers': 2, 'giraldi': 1, 'underground': 3, 'resistance': 3, 'kris': 2, 'sherwood': 1, 'andre': 1, 'braugher': 1, 'rags': 1, 'meugniot': 1, 'politics': 2, 'nic': 2, 'izzi': 2, 'fischa': 1, 'skeet': 1, 'ulrich': 1, 'albert': 2, 'finney': 2, 'heidi': 1, 'hurt': 2, 'recommendations': 2, 'vaughan': 2, 'vengeance': 4, 'rapaport': 1, 'giancarlo': 1, 'esposito': 1, 'eaten': 6, 'intrusion': 3, 'odonell': 2, 'loretta': 1, 'alper': 1, 'elliot': 1, 'lebovitz': 1, 'doyle': 3, 'emilio': 2, 'blindness': 2, 'notorious': 1, 'scarecrow': 2, 'singles': 1, 'th': 4, 'voyage': 2, 'sinbad': 1, 'trotsky': 1, 'chosen': 2, 'dantes': 2, 'inferno': 2, 'hooker': 1, 'trunk': 1, 't': 10, 'nell': 2, 'shopgirl': 2, 'spotswood': 1, 'hustle': 1, 'toe': 2, 'raymond': 3, 'massey': 2, 'goal': 1, 'prayers': 1, 'staininger': 1, 'kenneth': 3, 'degeneres': 1, 'kunert': 2, 'salisbury': 1, 'cohn': 1, 'randolph': 2, 'mantooth': 2, 'bloody': 2, 'imposter': 2, 'heartache': 1, 'whaling': 1, 'erskine': 1, 'buscemi': 2, 'lisa': 9, 'prinze': 3, 'mistress': 1, 'clements': 1, 'bauchau': 1, 'ninja': 3, 'hopewell': 1, 'sleepwalking': 1, 'brooklyn': 4, 'gabbert': 2, 'shepard': 1, 'gabriel': 5, 'byrne': 2, 'false': 1, 'accusation': 1, 'dougray': 3, 'sight': 10, 'pumpkin': 1, 'karver': 1, 'chamitoff': 1, 'exist': 1, 'akroyd': 3, 'schizophrenia': 2, 'forget': 1, 'dolans': 1, 'eriq': 1, 'lesalle': 1, 'usually': 2, 'excuse': 1, 'connolly': 3, 'inventory': 1, 'famke': 2, 'janssen': 2, 'dealt': 4, 'deportation': 1, 'master': 3, 'kathleen': 2, 'information': 31, 'easy': 3, 'junichi': 1, 'fujisaku': 1, 'malkovic': 1, 'uth': 1, 'satterfield': 1, 'warlock': 2, 'guerrilla': 3, 'warfare': 3, 'atencio': 1, 'spirit': 3, 'dreyfus': 2, 'afghanistan': 1, 'brain': 2, 'smasher': 1, 'duck': 1, 'le': 1, 'samoura': 1, 'shaft': 1, 'static': 1, 'vanessa': 6, 'angel': 3, 'amadeus': 2, 'delirious': 1, 'insanitarium': 1, 'stand': 1, 'bulletproof': 1, 'warm': 1, 'slow': 1, 'moving': 1, 'gregg': 1, 'champion': 1, 'lapica': 1, 'cecil': 1, 'demented': 1, 'selleck': 2, 'reba': 1, 'mcentire': 1, 'moretti': 1, 'rancher': 4, 'amos': 2, 'kollek': 2, 'saulnier': 1, 'helicopter': 2, 'raid': 3, 'hiroyuki': 2, 'kitakubo': 2, 'guiness': 4, 'colm': 3, 'meaney': 2, 'joaquin': 3, 'phoenix': 3, 'theodore': 2, 'witcher': 2, 'pankov': 4, 'mitzi': 1, 'kapture': 1, 'balaban': 2, 'starvation': 1, 'takahiro': 1, 'tanaka': 1, 'mellodrama': 1, 'andrea': 3, 'destruction': 3, 'jaclyn': 1, 'norbit': 1, 'peanuts': 1, 'state': 1, 'property': 1, 'forbidden': 5, 'ocallaghan': 1, 'drake': 3, 'details': 2, 'maniac': 2, 'phoebe': 1, 'eng': 7, 'subs': 7, 'guessing': 1, 'stonerville': 1, 'getting': 4, 'stoned': 1, 'harmony': 1, 'korine': 1, 'elfont': 1, 'kwok': 2, 'alexander': 3, 'rapp': 2, 'britt': 1, 'allcroft': 1, 'cheryl': 1, 'ladd': 1, 'chrispopher': 1, 'corey': 4, 'feldman': 4, 'deforest': 2, 'erik': 2, 'macarthur': 1, 'erika': 3, 'eleniak': 3, 'small': 4, 'hayley': 2, 'mills': 2, 'jackman': 4, 'ian': 7, 'mckellen': 3, 'gedrick': 2, 'kiristine': 2, 'bravery': 5, 'kristy': 1, 'swanson': 1, 'majors': 3, 'lynn': 4, 'shelton': 4, 'hoge': 2, 'melissa': 3, 'gilbert': 2, 'feifer': 1, 'pare': 1, 'phillip': 5, 'recovering': 1, 'alcoholic': 1, 'avary': 1, 'raimi': 3, 'gellar': 2, 'teri': 2, 'polo': 2, 'heroine': 3, 'jokes': 6, 'pearson': 1, 'quality': 1, 'glover': 2, 'anybody': 1, 'yet': 5, 'sobieski': 1, 'parole': 1, 'hearing': 1, 'talisa': 1, 'soto': 1, 'cavalline': 1, 'charisma': 1, 'dacascos': 3, 'criminals': 4, 'breaking': 2, 'rescue': 5, 'nuclear': 2, 'coen': 8, 'abandoned': 4, 'infotainment': 1, 'subject': 2, 'religulous': 1, 'wallace': 1, 'heard': 6, 'meatballs': 2, 'vibes': 1, 'mm': 1, 'upwards': 1, 'swamp': 1, 'flags': 1, 'fathers': 1, 'pete': 1, 'travis': 2, 'costas': 1, 'imprisonment': 4, 'san': 3, 'giacomo': 2, 'vincie': 1, 'heartless': 1, 'barkin': 1, 'karen': 5, 'ridings': 1, 'hurtz': 1, 'due': 3, 'double': 5, 'indemnity': 1, 'documentarys': 3, 'langella': 2, 'gotten': 1, 'revolved': 6, 'campaign': 1, 'may': 10, 'revolver': 1, 'clouds': 1, 'collars': 1, 'burlesque': 1, 'compared': 1, 'shes': 4, 'duty': 1, 'perceived': 1, 'maximum': 1, 'risk': 1, 'hows': 1, 'saech': 1, 'kinear': 1, 'miranda': 1, 'nancy': 4, 'grahn': 2, 'murakami': 1, 'joel': 4, 'lloyd': 6, 'greta': 2, 'scacchi': 1, 'sedative': 2, 'kellie': 2, 'prisoners': 2, 'mine': 1, 'greenwood': 3, 'allan': 1, 'goldstein': 1, 'sands': 1, 'bosses': 1, 'edwards': 6, 'jensen': 2, 'ackles': 2, 'lovers': 2, 'raquel': 1, 'welch': 1, 'making': 1, 'dagenham': 1, 'requesting': 1, 'pam': 2, 'grier': 2, 'seem': 1, 'fishburne': 3, 'outlanders': 1, 'suggestions': 1, 'alienation': 2, 'basket': 1, 'info': 6, 'wicksboro': 1, 'incident': 1, 'moody': 1, 'bummer': 1, 'cavalry': 2, 'kuo': 1, 'ren': 1, 'wu': 2, 'public': 3, 'meins': 1, 'august': 2, 'undergrounds': 2, 'mordum': 1, 'armitage': 2, 'lange': 3, 'loneliness': 2, 'cusak': 2, 'shane': 5, 'hammond': 2, 'niall': 1, 'maccormick': 1, 'georgina': 3, 'riedel': 2, 'dimitry': 1, 'elyashkevich': 1, 'irwin': 1, 'mattson': 1, 'marcil': 2, 'addis': 1, 'wimpenny': 1, 'triumphant': 1, 'fugue': 1, 'key': 2, 'village': 1, 'mccabe': 1, 'miller': 3, 'northern': 1, 'loose': 1, 'spirits': 3, '2081': 1, 'fantastic': 1, 'thirteenth': 1, 'floor': 2, 'brittany': 2, 'snatchers': 1, 'joey': 3, 'mutilation': 2, 'driven': 1, 'masahiko': 1, 'maesawa': 1, 'definitely': 2, 'porretta': 3, 'rie': 1, 'rasmussen': 1, 'wolotzky': 2, 'franklin': 1, 'guerrero': 1, 'avery': 2, 'asner': 1, 'wonderment': 1, 'rowland': 3, 'involves': 7, 'confession': 2, 'forgiven': 1, 'lea': 8, 'salonga': 1, 'phillips': 5, 'galactic': 1, 'kendall': 1, 'prairie': 1, 'companion': 1, 'nature': 3, 'wicked': 1, 'vince': 3, 'vieluf': 1, 'leick': 2, 'masciantonio': 1, 'berkley': 1, 'larry': 6, 'peerce': 1, 'lyde': 1, 'dementia': 1, 'tribe': 1, 'levin': 1, 'kenya': 4, 'montgomery': 5, 'clift': 4, 'ann': 4, 'unconventional': 4, 'appreciate': 1, 'theresa': 4, 'randall': 3, 'gangsters': 2, 'beth': 1, 'evans': 4, 'mcdowell': 3, 'donna': 2, 'derrico': 2, 'karl': 1, 'hirsch': 2, 'sherman': 1, 'curly': 2, 'miner': 5, 'warsaw': 1, 'ghetto': 1, 'dudikoff': 2, 'whiteman': 1, 'mandy': 1, 'patinkin': 1, 'foxes': 1, 'omalley': 1, 'trejo': 1, 'morse': 2, 'dunn': 4, 'malevolence': 1, 'dwells': 1, 'wing': 1, 'chaser': 1, 'supposedly': 1, 'unicorn': 1, 'asking': 2, 'inquiring': 1, 'peterson': 3, 'interested': 3, 'immortality': 1, 'vaughn': 3, 'parallel': 2, 'universes': 1, 'balloons': 3, 'rommel': 1, 'lustig': 2, 'imagination': 2, 'pritts': 1, 'danielle': 2, 'fishel': 2, 'mulgrew': 1, 'catholicism': 1, 'osamu': 1, 'dezaki': 1, 'quaid': 1, 'ubaldo': 1, 'ragona': 1, 'basannavar': 1, 'mackenzie': 2, 'alicia': 2, 'silverstone': 2, 'rel': 1, 'filming': 1, 'style': 2, 'passion': 1, 'career': 1, 'mcclellan': 1, 'garett': 2, 'maggart': 2, 'suffering': 3, 'buzzell': 1, 'notarile': 1, 'setting': 2, 'survival': 4, 'mathew': 3, 'lillard': 3, 'julianna': 2, 'lavin': 2, 'specific': 2, 'treachery': 1, 'skelding': 2, 'wise': 4, 'vino': 1, 'salame': 1, 'joshua': 2, 'carroll': 4, 'oconner': 2, 'priestley': 2, 'ziering': 3, 'friendship': 2, 'alvarez': 1, 'peggy': 1, 'whaley': 3, 'durante': 1, 'mastrantonio': 1, 'moira': 1, 'lander': 1, 'marra': 1, 'casper': 1, 'dien': 1, 'naim': 1, 'sematary': 1, 'galaxy': 2, 'kelber': 1, 'warthog': 2, 'kazuo': 1, 'terada': 1, 'sure': 1, 'wen': 1, 'jiang': 1, 'searching': 12, 'buchanan': 1, 'akhurst': 1, 'bounty': 6, 'hunters': 1, 'mann': 2, 'xavier': 2, 'puslowski': 2, 'dome': 1, 'dogville': 1, 'robinson': 2, 'crusoe': 1, 'sharpes': 1, 'stabbing': 2, 'infantolino': 1, 'colman': 1, 'kalin': 2, 'paradox': 1, 'pilgrim': 1, 'englishman': 1, 'brandis': 1, 'disturbed': 1, 'zacharias': 1, 'proyas': 1, 'survivors': 1, 'paolo': 3, 'montalban': 3, 'wondering': 1, 'karaoke': 2, 'isitt': 1, 'entitled': 6, 'electric': 1, 'mist': 1, 'replace': 1, 'clinton': 3, 'choose': 1, 'abbess': 1, 'wizards': 2, 'moran': 1, 'forbes': 1, 'cook': 2, 'yancy': 1, '1800': 1, 'bologna': 1, 'schwimmer': 3, 'summit': 1, 'dustin': 5, 'diamond': 2, 'devitos': 1, 'downtown': 2, 'adrian': 2, 'vitoria': 1, 'steiman': 1, 'pena': 1, 'barbie': 2, 'fabrice': 2, 'du': 2, 'welz': 1, 'schrader': 1, 'leonetti': 1, 'stevan': 1, 'mena': 1, 'deals': 4, 'infantry': 1, 'vohrer': 1, 'deception': 1, 'zaphiratos': 1, 'karbelnikoff': 1, 'brett': 2, 'ricardo': 2, 'montalbon': 2, 'beghe': 3, 'gavin': 1, 'budd': 1, 'emmett': 2, 'alston': 2, 'collins': 2, 'lynda': 2, 'jail': 4, 'betrayal': 2, 'ernest': 1, 'shari': 1, 'lies': 2, 'marciano': 1, 'jada': 3, 'pinkett': 3, 'sollett': 1, 'nazi': 7, 'occupation': 1, 'activism': 4, 'isabella': 3, 'rosselini': 3, 'bryan': 1, 'sivertson': 1, 'hindman': 2, 'spacecrafts': 2, 'marcus': 3, 'raboy': 1, 'memorial': 1, 'ransom': 3, 'kearsley': 1, 'steyermark': 1, 'yasmine': 2, 'bleeth': 2, 'bayne': 1, '18': 2, 'century': 2, 'wilkins': 1, 'caruso': 1, 'tatnya': 1, 'ali': 1, 'walters': 2, 'optimism': 1, 'anarchy': 2, 'brickman': 1, 'chong': 3, 'paddy': 1, 'breathnach': 1, 'parasites': 4, 'awarded': 1, 'tori': 1, 'spelling': 1, 'cory': 1, 'brenden': 1, 'sexton': 1, 'pantiliano': 1, 'stephenson': 1, 'lara': 3, 'fabian': 1, 'joven': 1, 'tan': 1, 'cynthia': 1, 'occupied': 2, 'poland': 2, 'lederman': 1, 'diana': 2, 'riggs': 1, 'illness': 1, 'zane': 2, 'planes': 1, 'water': 2, 'cert': 1, 'mayor': 1, 'these': 2, 'drop': 1, 'gorgeous': 1, 'earthlings': 1, 'edwin': 1, 'mccain': 1, 'grandmas': 1, 'jealousy': 5, 'hoodrats': 1, 'hoodrat': 1, 'slated': 1, 'platoon': 5, 'sorority': 3, 'row': 2, 'nicholsons': 1, 'anywhere': 2, 'rosalie': 1, 'bunch': 2, 'fastest': 1, 'indian': 1, 'obvious': 1, 'marsters': 2, 'morey': 2, 'darjeeling': 1, 'limited': 1, 'necropolis': 1, 'mckenzie': 2, 'astin': 3, 'rydell': 1, 'michell': 2, 'entertainment': 3, 'thir': 1, 'en': 1, 'adults': 1, 'rip': 1, 'circuit': 1, 'remaking': 1, 'gathering': 1, 'eagles': 1, 'check': 1, 'altitude': 1, 'donnell': 1, 'rawlings': 1, 'ashy': 1, 'classy': 1, 'toons': 1, 'hells': 1, 'kitchen': 1, 'television': 1, 'ring': 2, 'shaun': 1, 'sheep': 1, 'leap': 1, 'lambkind': 1, 'siren': 1, 'slaughtered': 1, 'vomit': 1, 'dolls': 2, 'museum': 1, 'even': 1, 'alden': 1, 'redgrave': 1, 'tracie': 4, 'lords': 4, 'somehow': 2, 'ginsburg': 2, 'hooks': 2, 'widen': 1, 'mccaulay': 3, 'tales': 1, 'execution': 6, 'jordanna': 3, 'brewster': 3, 'foley': 1, 'dylan': 2, 'neal': 2, 'flintstones': 1, 'honor': 4, 'mayfield': 1, 'gina': 2, 'gershon': 2, 'heather': 2, 'graham': 1, 'ruzickova': 1, 'bonamy': 1, 'jaye': 2, 'erick': 1, 'dowdle': 1, 'taking': 2, 'stevenson': 2, 'morio': 1, 'asaka': 1, 'invisibility': 3, 'sebastian': 2, 'panneck': 2, 'amitri': 1, 'espejo': 1, 'hartle': 1, 'ned': 1, 'farr': 1, 'market': 3, 'gluck': 1, 'daniela': 2, 'pestova': 2, 'jag': 1, 'mundhra': 1, 'nikki': 2, 'cox': 3, 'vonda': 3, 'shepherd': 4, 'rosson': 1, 'scoggins': 3, 'soderbergh': 6, 'sage': 1, 'date': 3, 'duchovny': 2, 'illegal': 1, 'activity': 1, 'drenner': 1, 'patient': 1, 'disguised': 1, 'mira': 7, 'furlan': 1, 'quod': 2, 'drugs': 2, 'sexuality': 4, 'dandridge': 1, 'contend': 1, 'revolving': 7, 'stan': 4, 'kirsch': 4, 'beltran': 1, 'sticks': 1, 'starrcade': 1, 'ultimate': 1, 'mirrormask': 1, 'preston': 1, 'cruelty': 1, 'fine': 2, 'decision': 2, 'gunpoint': 1, 'focuses': 2, 'torture': 6, 'detectives': 3, 'explores': 1, 'focusing': 2, 'neel': 1, 'tetsuro': 1, 'amino': 1, 'teens': 3, 'societal': 2, 'girly': 1, 'hardship': 1, 'everlasting': 3, 'base': 2, 'bandits': 3, 'hilarious': 1, 'couple': 1, 'twisters': 1, 'snipers': 3, 'neretva': 1, 'shack': 1, 'joneses': 1, '976': 1, 'bangkok': 1, 'dr': 10, 'caparulo': 1, 'cap': 1, 'malibu': 1, 'motocrossed': 1, 'paper': 1, 'woodstock': 1, 'tenten': 1, 'diemens': 1, 'cannabis': 1, 'cured': 1, 'irving': 3, 'lerner': 1, 'revenge': 5, 'tx': 1, 'ado': 2, 'kalangis': 1, 'church': 1, 'gorney': 3, 'baron': 2, 'munchausen': 1, 'sunset': 2, 'briefcase': 1, 'oregon': 1, 'jennie': 1, 'garth': 2, 'amateurs': 1, 'silver': 1, 'haim': 3, 'gero': 1, 'bancroft': 2, 'investigation': 3, 'gosselar': 2, 'roxann': 2, 'browne': 1, 'unhappy': 2, 'sexual': 2, 'fischer': 1, 'content': 1, 'kentis': 1, 'harvest': 3, 'waitt': 1, 'mathilde': 1, 'bittner': 1, 'self': 3, 'saffa': 1, 'fever': 3, 'head': 9, 'plan': 1, 'anticipated': 1, 'jeri': 3, 'sweetheart': 1, 'gasaway': 1, 'cattle': 1, 'marc': 2, 'rocco': 2, 'biel': 2, 'townsend': 3, 'criminal': 1, 'mastermind': 1, 'sherry': 1, 'stringfield': 1, 'dual': 2, 'identities': 1, 'boaz': 1, 'yakin': 1, 'appeal': 1, 'jeffery': 1, 'friedman': 1, 'fickman': 1, 'maddocks': 1, 'freiburger': 1, 'surgery': 1, 'shalmar': 1, 'joke': 3, 'kimberly': 1, 'mcculough': 1, 'merit': 1, 'badge': 1, 'ari': 1, 'taub': 1, 'robins': 1, 'shakes': 1, 'jennings': 1, 'shoehorned': 1, 'subplot': 1, 'hitchhikers': 1, 'guide': 1, 'toys': 3, 'chance': 1, 'dissociative': 1, 'disorder': 1, 'jfk': 2, 'gomez': 1, 'bleckner': 1, 'fighter': 1, 'jarmusch': 1, 'krista': 2, 'morrit': 2, 'fuest': 1, 'julian': 3, 'kemp': 1, 'files': 1, 'hoover': 1, 'levar': 3, 'blackmail': 1, 'donuts': 2, 'carrot': 1, 'keri': 1, 'maid': 1, 'cole': 1, 'scar': 2, 'alison': 1, 'raimund': 1, 'huber': 1, 'shunji': 1, 'iwai': 1, 'transylvania': 1, 'micheal': 1, 'lester': 2, 'calista': 1, 'flockhart': 1, 'mccrudden': 1, 'haggerty': 4, 'lucci': 1, 'turteltaub': 1, 'lozano': 1, 'powell': 2, 'whitworth': 2, 'jan': 4, 'rachman': 1, 'crimes': 4, 'velcrow': 1, 'ripper': 1, 'garrett': 2, 'wang': 2, 'johan': 1, 'grimonprez': 1, 'peopled': 1, 'laughed': 1, 'gian': 2, 'keth': 1, 'szarabajka': 1, 'wolochatiuk': 1, 'huntington': 2, 'seiji': 1, 'chiba': 1, 'stolz': 3, 'comical': 1, 'nibbelink': 1, 'technology': 1, 'delaney': 2, 'manger': 1, 'aidan': 4, 'quinn': 4, 'maurice': 4, 'chair': 1, 'mellisa': 3, 'hart': 3, 'carrere': 2, 'polson': 1, 'josetxo': 1, 'mateo': 1, 'bell': 3, 'ferland': 1, 'bassett': 3, 'dorn': 2, 'storage': 1, 'felicias': 1, 'fusion': 1, 'trip': 2, 'fro': 1, 'tanya': 4, 'cyborg': 1, 'ileana': 2, 'winkler': 2, 'tiffani': 3, 'thiessen': 3, 'byington': 1, 'loss': 1, 'mitchum': 1, 'takashi': 1, 'ishii': 1, 'reconnaissance': 1, 'cafiero': 1, 'broken': 2, 'engagement': 1, 'hagman': 1, 'sugg': 1, 'rowlands': 1, 'herbert': 2, 'coleman': 1, 'gerard': 2, 'depardieu': 2, 'pushed': 1, 'lew': 1, 'attempted': 2, 'milestone': 1, 'relating': 1, 'gino': 1, 'nichele': 1, 'natascha': 1, 'mcelhone': 1, 'jeffrey': 1, 'katzenberg': 1, 'kathie': 1, 'gifford': 1, 'kriv': 1, 'stenders': 1, 'burgi': 1, 'midlife': 1, 'crisis': 1, 'geno': 1, 'mcgahee': 1, 'zach': 1, 'hofmeyr': 1, 'brock': 3, 'basil': 1, 'cloke': 1, 'minutes': 1, 'graffiti': 1, 'tennis': 1, 'pursuit': 2, 'sortie': 1, 'des': 1, 'ateliers': 1, 'vibert': 1, 'bride': 3, 'toms': 1, 'midnight': 1, 'garden': 1, 'valmont': 1, 'melman': 1, 'shanley': 1, 'strike': 2, 'schlesinger': 1, 'lambert': 4, 'lon': 1, 'chaney': 1, 'attempt': 3, 'orphans': 1, 'wrye': 2, 'fernando': 1, 'colunga': 1, 'sher': 1, 'anna': 2, 'galvin': 1, 'einstein': 2, 'incorporate': 1, 'shapeshifting': 3, 'ryans': 1, 'rupert': 3, 'everet': 3, 'poet': 2, 'irish': 2, 'friedkin': 1, 'hamburg': 1, 'winger': 2, 'lebrock': 2, 'margret': 1, 'places': 3, 'basic': 1, 'instinct': 1, 'strong': 1, 'gornick': 2, 'melodramas': 2, 'several': 12, 'jeffs': 1, 'kieran': 1, 'carney': 2, 'twenty': 1, 'reprieve': 1, 'pale': 1, 'gates': 3, 'mcfadden': 2, 'central': 2, 'believers': 1, 'dangerous': 2, 'saturn': 1, 'territories': 1, 'cooler': 1, 'rimshop': 1, 'delbert': 1, 'lawless': 1, 'listed': 1, 'dishonor': 2, 'receiving': 2, 'kazuaki': 1, 'kiriya': 1, 'skill': 1, 'stevens': 2, 'infidelity': 1, 'fraunces': 1, 'premise': 2, 'exciting': 1, 'maniacts': 1, 'chuang': 1, 'mellinda': 2, 'mention': 1, 'haussman': 1, 'michel': 2, 'orion': 1, 'suede': 1, 'ide': 1, 'shootout': 1, 'brokedown': 1, 'palace': 1, 'fiddler': 1, 'roof': 1, 'sarahs': 1, 'choice': 1, 'maria': 3, 'bello': 4, 'favorites': 1, 'pinson': 3, 'cukor': 1, 'woodward': 1, 'debuted': 1, 'frankel': 1, 'osteen': 1, 'lachman': 1, 'heroes': 2, 'crash': 2, 'polito': 1, 'addresses': 1, 'consequences': 2, 'deforestation': 1, 'skirmishes': 1, 'border': 2, 'reiser': 2, 'meehl': 1, 'womens': 1, 'der': 1, 'beek': 1, 'ultimately': 1, 'ikea': 1, 'rise': 1, 'byrd': 1, 'mcdonald': 1, 'archer': 1, 'luner': 2, 'disfigured': 1, 'supper': 1, 'doomsday': 1, 'device': 1, 'directory': 1, 'bam': 1, 'margera': 1, 'global': 3, 'climate': 1, 'issue': 1, 'boase': 1, 'madsen': 2, 'gains': 1, 'guilietta': 1, 'masina': 1, 'haley': 2, 'flatland': 1, 'okay': 1, 'cky': 1, 'k': 1, 'enjoyed': 1, 'retirement': 3, 'sullivan': 1, 'kirby': 1, 'dick': 1, 'example': 1, 'security': 2, 'kell': 1, 'evan': 1, 'detten': 1, 'willard': 2, 'ivan': 2, 'mitov': 1, 'melinda': 1, 'clarke': 1, 'pregnancy': 2, 'jo': 2, 'chandler': 2, 'wilcox': 1, 'ric': 1, 'monte': 1, 'snatching': 1, 'salley': 1, 'dennehy': 3, 'kari': 2, 'wuhrer': 2, 'mccormack': 1, 'marcos': 1, 'efron': 1, 'fina': 1, 'torres': 1, 'duhame': 1, 'jac': 1, 'schaeffer': 1, 'kinney': 1, 'loverboy': 1, 'mnemonic': 1, 'soapdish': 1, 'bradley': 1, 'stockholm': 1, 'syndrome': 1, 'summarize': 2, 'serpent': 1, 'keegan': 1, 'raising': 1, 'arizona': 1, 'panorama': 1, 'hardware': 1, 'cover': 1, 'zerophilia': 1, 'acceptance': 1, 'exit': 1, 'milla': 1, 'jovovich': 1, 'rafelson': 1, 'oh': 1, 'thunderbolt': 1, 'lightfoot': 1, 'sweatshop': 1, 'elektra': 1, 'actions': 1, 'phedon': 1, 'papamichael': 1, 'cess': 1, 'silvera': 1, 'terrorist': 3, 'travelers': 1, 'boondock': 1, 'saints': 1, 'corky': 1, 'romano': 1, 'bunraku': 1, 'jay': 3, 'duplass': 1, 'calvert': 1, 'rigg': 1, 'hybrid': 1, 'unsettling': 1, 'alaimo': 1, 'sackheim': 2, 'cheetah': 1, 'gonzalez': 1, 'paxton': 1, 'tatopoulos': 1, 'killed': 5, 'laughs': 1, 'fugitives': 2, 'lamont': 1, 'starman': 1, 'offered': 7, 'such': 3, 'damato': 2, 'promises': 2, 'assayas': 1, 'shou': 1, 'wanted': 2, 'wanting': 4, 'lasted': 1, 'martyn': 1, 'pick': 1, 'alberta': 1, 'company': 2, 'cassandra': 2, 'alyssa': 2, 'moy': 1, 'job': 5, 'claudia': 3, 'debrah': 1, 'farentino': 1, 'donnie': 1, 'walberg': 1, 'ridgemont': 1, 'geena': 1, 'geri': 2, 'halliwell': 2, 'poole': 1, 'chen': 1, 'madeline': 1, 'stowe': 1, 'follows': 1, 'shanks': 3, 'valen': 1, 'boer': 1, 'instrumental': 1, 'appearing': 1, 'stacey': 1, 'ugly': 1, 'guard': 1, 'fridel': 1, 'oconnor': 4, 'sitch': 2, 'cambodia': 2, 'blaine': 1, 'patton': 2, 'budreau': 1, 'pistol': 1, 'whip': 1, 'leah': 1, 'sturgis': 1, 'lou': 2, 'sims': 1, 'friz': 1, 'freleng': 1, 'outcast': 1, 'food': 2, 'approved': 1, 'meerkat': 1, 'ri': 1, 'chard': 1, 'though': 1, 'kinji': 1, 'fukasaku': 1, 'morrison': 1, 'dominique': 1, 'milano': 2, 'parise': 1, 'corporate': 2, 'ernst': 1, 'gossner': 1, 'doran': 1, 'dale': 3, 'steffanino': 1, 'barnz': 1, 'doucette': 1, 'largely': 1, 'ulmer': 1, 'florian': 1, 'henckel': 1, 'donnersmarck': 1, 'averages': 1, 'murdered': 1, 'marek': 1, 'losey': 1, 'solitary': 2, 'confinement': 2, 'clancy': 1, 'roos': 1, 'germany': 1, 'alphonso': 2, 'tze': 1, 'chun': 1, 'deemed': 3, 'larabe': 2, 'kolton': 1, 'focused': 3, 'kilner': 1, 'norda': 1, 'aronoff': 1, 'seller': 1, 'nia': 1, 'vardalos': 1, 'technically': 1, 'prendergast': 1, 'mazin': 1, 'holechek': 1, 'anjelica': 1, 'russ': 1, 'todd': 4, 'verow': 1, 'alcohol': 4, 'fairuza': 1, 'balk': 1, 'kristine': 1, 'mendes': 1, 'huck': 1, 'botko': 1, 'jalmari': 1, 'helander': 1, 'kevan': 1, 'rage': 2, 'kostas': 1, 'karagiannis': 1, 'genocide': 1, 'nair': 1, 'gurland': 1, 'mohr': 1, 'lance': 3, 'weiler': 1, 'pepper': 2, 'argott': 1, 'standoff': 3, 'toni': 1, 'harman': 1, 'liza': 2, 'int': 2, 'decerchio': 1, 'anita': 1, 'laselva': 1, 'vehicle': 1, 'demme': 1, 'psychopath': 2, 'northam': 1, 'isacsson': 1, 'pays': 1, 'melski': 1, 'accidental': 1, 'alberto': 1, 'cavalcanti': 1, 'embezzlement': 2, 'menell': 1, 'hypocrisy': 1, 'deblois': 1, 'reflection': 1, 'levien': 1, 'pascal': 1, 'franchot': 1, 'mcconaughey': 2, 'fiennes': 1, 'smits': 1, 'patty': 2, 'jenkins': 3, 'scribner': 1, 'revolution': 1, 'bangalter': 1, 'rowell': 1, 'warner': 1, 'spiner': 1, 'till': 2, 'barker': 1, 'axelgaard': 1, 'teddy': 4, 'tylo': 1, 'christensen': 1, 'oliveira': 1, 'ruben': 1, 'preuss': 1, 'brownrigg': 1, 'ritter': 1, 'betz': 1, 'solunga': 2, 'despair': 1, 'sangiuliano': 1, 'natasha': 3, 'pavlovich': 2, 'benard': 3, 'annakin': 1, 'aldas': 1, 'reiners': 1, 'myles': 2, 'fergusons': 2, 'mccarthys': 1, 'biographys': 1, 'thora': 1, 'birch': 2, 'choices': 1, 'hank': 1, 'braxtan': 1, 'jerami': 1, 'asquith': 1, 'gariazzo': 1, 'shyu': 1, 'greenfield': 1, 'whale': 1, 'facts': 1, 'malloy': 1, 'lissa': 1, 'rinna': 1, 'thief': 2, 'henstridge': 1, 'wainwright': 1, 'gabby': 1, 'peters': 1, 'tuukka': 1, 'tiensuu': 1, 'matthau': 1, 'chu': 1, 'amusing': 1, 'annoying': 1, 'roommate': 1, 'simple': 1, 'platt': 1, 'cybill': 1, 'tea': 2, 'leoni': 2, 'afterlife': 1, 'portillo': 2, 'speers': 1, 'binder': 1, 'kent': 1, 'jared': 2, 'leto': 2, 'stern': 2, 'wolfinger': 1, 'sadofsky': 1, 'axel': 1, 'rebecca': 4, 'cammisa': 1, 'sophia': 1, 'loren': 1, 'river': 4, 'doremus': 2, 'rosanna': 2, 'munro': 1, 'blutman': 1, 'desi': 1, 'arnaz': 1, 'cheech': 1, 'marin': 1, 'engstrom': 1, 'tomas': 1, 'sandquist': 1, 'baird': 1, 'wiley': 1, 'fishman': 1, 'classified': 1, 'dover': 1, 'koshashvili': 1, 'henriksen': 1, 'kiersch': 1, 'humber': 1, 'balderstone': 1, 'placid': 1, 'monkey': 1, 'presidents': 1, 'sleepover': 1, 'freebie': 1, 'capote': 1, 'elf': 1, 'hustler': 1, 'pasdar': 1, 'denk': 1, 'motor': 1, 'hurley': 1, 'rentzel': 1, 'thueson': 1, 'squeakquel': 1, 'boogeyman': 1, 'turin': 1, 'webber': 1, 'orgy': 1, 'becoming': 1, 'darklands': 1, 'def': 1, 'diggers': 1, 'dysfunktional': 2, 'electroma': 1, 'gung': 1, 'ho': 1, 'carlsons': 1, 'makin': 1, 'corpses': 1, 'confusing': 1, 'internal': 1, 'jakes': 1, 'corner': 1, 'luster': 1, 'brandos': 1, 'bones': 2, 'blueberry': 1, 'lai': 2, 'prefontaine': 1, 'niros': 1, 'satans': 2, 'helper': 2, 'simpatico': 1, 'figures': 1, 'slingshot': 1, 'someones': 1, 'knocking': 1, 'splintered': 1, 'hawks': 1, 'hounds': 1, 'adele': 1, 'sec': 1, 'grey': 1, 'hire': 1, 'ambush': 1, 'others': 2, 'signal': 1, 'trading': 1, 'trick': 1, 'twin': 1, 'peaks': 1, 'unrivaled': 1, 'wisegal': 1, 'zenon': 1, 'zequel': 1, 'molina': 1, 'maggi': 1, 'sykes': 1, 'tiffany': 1, 'kilbourne': 1, 'raged': 1, 'katsuhiro': 1, 'ohtomo': 1, 'gibney': 1, 'duffy': 1, 'freudenthal': 1, 'concerns': 1, 'barnick': 1, 'ving': 1, 'rhames': 1, 'monika': 1, 'treut': 1, 'injustice': 1, 'crowder': 1, 'february': 1, 'christophe': 1, 'gans': 1, 'conway': 1, 'jessy': 1, 'terrero': 1, 'organized': 2, 'troupe': 1, 'description': 1, 'carvey': 1, 'scolari': 1, 'abilities': 1, 'lien': 1, 'bobbie': 1, 'ellis': 1, 'brutality': 1, 'gayheart': 2, 'boogie': 1, 'woogie': 1, 'laughing': 1, 'twisted': 2, 'tasers': 1, 'isaacs': 1, 'lorentzon': 1, 'darlene': 1, 'vogel': 1, 'rene': 3, 'russo': 4, 'vaungh': 1, 'heady': 2, 'anat': 1, 'seftel': 1, 'polish': 1, 'williamson': 1, 'abuse': 1, 'felitta': 1, 'callahan': 3, 'mining': 3, 'minkoff': 1, 'wincat': 1, 'alcala': 1, 'bellware': 1, 'lieutenant': 1, 'chelsom': 1, 'dom': 1, 'rotheroe': 1, 'champions': 1, 'gale': 1, 'harding': 1, 'regarding': 2, 'hacker': 1, 'mikey': 1, 'hilb': 1, 'strock': 1, 'posey': 1, 'ferguson': 1, 'altieri': 1, 'langer': 1, 'burny': 1, 'mattinson': 1, 'delta': 2, 'burke': 3, 'rowan': 1, 'bednarski': 1, 'stephens': 1, 'cassidy': 3, 'jewish': 2, 'paudge': 1, 'behan': 1, 'yudis': 2, 'jorge': 1, 'solis': 1, 'sweeney': 1, 'sorbo': 1, 'foot': 1, 'wayons': 2, 'stellan': 2, 'olsson': 2, 'cerasolis': 1, 'damian': 1, 'roland': 1, 'emmerich': 1, 'bo': 1, 'zenga': 1, 'dogma': 1, 'headless': 1, 'horseman': 1, 'darwell': 1, 'longis': 1, 'homo': 1, 'erectus': 1, 'rollercoaster': 1, 'mouseketeers': 1, 'breakin': 1, 'everett': 1, 'harmon': 1, 'overton': 1, 'courtship': 1, 'eddies': 1, 'basis': 1, 'believer': 1, 'adventurer': 1, 'narrated': 1, 'monkeybone': 1, 'edge': 1, 'annapolis': 1, 'blackboard': 1, 'teds': 1, 'bullet': 1, 'cherrybomb': 1, 'americas': 1, 'cyber': 1, 'threat': 1, 'divine': 1, 'ya': 2, 'sisterhood': 1, 'dumping': 2, 'gekijouban': 1, 'stay': 1, 'unlimited': 1, 'works': 1, 'gymkata': 1, 'normal': 1, 'newsmakers': 1, 'majestys': 1, 'service': 1, 'silicon': 1, 'valley': 3, 'rancid': 1, 'rockaway': 1, 'sanctimony': 1, 'taboo': 1, 'botany': 1, 'confidant': 1, 'tremors': 1, 'triangle': 2, 'waking': 1, 'wilde': 1, 'marina': 1, 'gavrilova': 1, 'exotic': 1, 'mask': 1, 'trikonis': 1, 'cream': 1, 'coscarellis': 1, 'tapping': 2, 'bairstow': 1, 'garfield': 1, 'gabrielle': 1, 'anwar': 1, 'barely': 1, 'preparatory': 1, 'goldblum': 2, 'rater': 1, 'loop': 1, 'ally': 1, 'beverley': 1, 'mitchell': 2, 'schatzberg': 1, 'ingo': 1, 'rademacher': 1, 'darrell': 1, 'mapson': 1, 'jumbo': 1, 'roadkill': 1, 'bonifacio': 1, 'frakes': 1, 'cocktail': 1, 'assemblage': 1, 'sphere': 1, 'koehler': 1, 'zuccon': 1, 'quadrophenia': 1, 'tides': 4, 'fit': 1, 'uninvited': 1, 'unloved': 1, 'camping': 1, 'dusk': 1, 'destiny': 1, 'hedge': 1, 'spinout': 1, 'roscoe': 1, 'penance': 1, 'metamorphosis': 1, 'tigerland': 1, 'bongwater': 1, 'cow': 1, 'target': 2, 'patti': 1, 'labelle': 1, 'sargent': 1, 'kopple': 1, 'segal': 2, 'resnikoff': 1, 'abell': 1, 'storke': 1, 'tells': 2, 'locklear': 1, 'fairy': 1, 'far': 1, 'elliotte': 1, 'bankruptcy': 1, 'hamid': 1, 'gurkha': 1, 'grief': 1, 'wim': 1, 'wenders': 1, 'dannelly': 1, 'rory': 1, 'mchenry': 1, 'bangs': 1, 'grossman': 1, 'heisler': 1, 'egleson': 1, 'mandelbaum': 1, 'fields': 1, 'zieff': 1, 'demoniacs': 1, 'tile': 1, 'simcha': 1, 'jacobovici': 1, 'create': 2, 'fellinis': 1, 'zeitgeist': 1, 'griffin': 1, 'reflections': 1, 'groomsmen': 1, 'radiation': 1, 'fated': 2, 'hurst': 1, 'judging': 1, 'digital': 1, 'evolution': 2, 'houchins': 1, 'witchcraft': 1, 'renoire': 1, 'settings': 1, 'spader': 1, 'naked': 2, 'lighter': 1, 'lower': 2, 'fahey': 1, 'salli': 1, 'reputations': 1, 'viewing': 1, 'jann': 1, 'butch': 1, 'saul': 1, 'dibb': 1, 'lockheart': 1, 'switzer': 1, 'mccallum': 1, 'gondry': 1, 'hartfield': 1, 'qualifies': 7, 'pitof': 1, 'fairchild': 1, 'stouffer': 1, 'dani': 1, 'menkin': 1, 'colleges': 1, 'bouzaglo': 1, 'sheridan': 1, 'hollis': 1, 'chamberlain': 1, 'lacey': 1, 'chabert': 1, 'thaw': 1, 'brady': 1, 'barrett': 1, 'iren': 1, 'koster': 1, 'pytka': 1, 'ciarn': 1, 'hinds': 1, 'shorts': 1, 'ipson': 1, 'donal': 1, 'mosher': 1, 'batchelor': 1, 'courtois': 1, 'galland': 1, 'brings': 1, 'forth': 1, 'totally': 1, 'paint': 1, 'wagon': 1, 'leitch': 1, 'colleen': 1, 'dewhurst': 1, 'rosenberg': 1, 'kosick': 1, 'tillman': 1, 'valentine': 1, 'pelka': 1, 'coneybeare': 1, 'hillcoat': 1, 'carl': 1, '964': 2, 'pinocchio': 2, 'neighbours': 1, 'cages': 1, 'rudo': 1, 'y': 1, 'cursi': 1, 'shrink': 2, 'musician': 1, 'unwed': 1, 'gunslinger': 1, 'alderton': 1, 'chih': 1, 'leong': 1, 'hallucination': 1, 'signorelli': 1, 'lowe': 1, 'cypher': 1, 'cipolla': 1, 'cicely': 1, 'dammes': 1, 'atwell': 1, 'passenger': 1, '57': 1, 'wad': 1, 'coal': 1, 'rufus': 1, 'sewell': 1, 'rosas': 1, 'terence': 1, 'daw': 1, 'crasher': 1, 'bend': 1, 'cheyenne': 1, 'cruel': 1, 'necessary': 1, 'creation': 1, 'dressed': 1, 'straight': 1, 'magoo': 1, 'magnificent': 2, 'sleep': 2, 'drunks': 1, 'boxleitner': 1, 'whatever': 1, 'wire': 1, 'brotherhood': 1, 'tapes': 1, 'grimble': 1, 'bender': 1, 'enjoyable': 1, 'trandem': 1, 'mate': 1, 'gullivers': 2, 'travels': 2, 'weights': 1, 'kinsey': 1, 'joffe': 1, 'necromentia': 1, 'overnight': 1, 'delivery': 1, 'pounds': 1, 'subtle': 1, 'seduction': 1, 'saved': 1, 'pain': 1, 'proudly': 1, 'hail': 2, 'whaledreamers': 1, 'chucky': 1, 'fruit': 1, 'metropia': 1, 'judgment': 2, 'walls': 1, 'cinema': 2, 'afraid': 1, 'senior': 1, 'gamers': 1, 'dorkness': 1, 'rising': 2, 'billion': 1, 'scooby': 2, 'doo': 3, 'abracadabra': 1, 'heredity': 1, 'valdemar': 1, 'bittersweet': 1, 'backyards': 1, 'fierlinger': 1, 'vadim': 1, 'sheng': 1, 'ding': 1, 'gummo': 1, 'grizzly': 1, 'loch': 1, 'ness': 1, 'lawn': 1, 'principal': 1, 'purchase': 5, 'online': 2, 'sandor': 1, 'rank': 2, 'among': 2, 'nicola': 1, 'bounce': 1, 'showing': 1, 'sunday': 1, 'dagger': 1, 'okuribito': 1, 'ran': 1, 'rear': 2, 'graveyard': 1, 'shift': 1, 'hours': 1, 'pepin': 1, 'markowitz': 1, 'storytelling': 1, 'lovelace': 1, 'conrad': 1, 'rooks': 1, 'centering': 1, 'hutchison': 1, 'palance': 1, 'cutler': 1, 'stacy': 1, 'zinn': 1, 'bakshi': 1, 'dealers': 1, 'fleming': 1, 'aquarium': 1, 'sabato': 1, 'berkelys': 1, 'deniros': 1, 'mcnee': 1, 'joness': 1, 'diary': 1, 'wolf': 1, 'assassins': 1, 'merry': 1, 'bridge': 2, 'camouflaged': 1, 'blackie': 1, 'wheel': 1, 'candyman': 1, 'cleo': 1, 'pimpernel': 1, 'cello': 1, 'aykroyd': 1, 'unplugged': 1, 'ufos': 1, 'bone': 2, 'hat': 1, 'chamber': 1, 'hey': 2, 'esther': 1, 'blueburger': 1, 'player': 1, 'abby': 1, 'kick': 1, 'ass': 1, 'laputa': 1, 'lets': 2, 'lornas': 1, 'neshoba': 1, 'northfork': 1, 'orange': 1, 'county': 1, 'perestroika': 1, 'primal': 1, 'cut': 1, 'dirt': 1, 'seraphim': 1, 'falls': 1, 'silkwood': 1, 'slam': 1, 'bang': 1, 'splatter': 1, 'disco': 1, 'blvd': 1, 'sweet': 1, 'sweetbacks': 1, 'baadasssss': 1, 'bells': 1, 'ipcress': 1, 'file': 1, '1900': 1, 'knew': 1, 'bees': 1, 'sweetest': 1, 'telling': 1, 'unknown': 1, 'wanderers': 1, 'wicker': 1, 'thunderpants': 1, 'mini': 1, 'midkiff': 1, 'environmentalism': 1, 'gallipoli': 1, 'nineteen': 1, 'eighty': 1, 'bricktown': 1, 'flypaper': 1, 'longshots': 1, 'wolves': 1, 'buck': 1, 'expedition': 1, 'invaders': 1, 'tortured': 1, 'irishman': 1, 'zoom': 1, 'cu': 1, 'mama': 1, 'rugrats': 1, 'sitting': 1, 'waterfall': 1, 'almost': 1, 'meth': 1, 'legion': 1, 'macbeth': 1, 'masters': 1, 'universe': 1, 'terrifying': 1, 'girlfriends': 1, 'lunch': 1, 'babes': 1, 'thon': 1, 'tenshi': 1, 'tamago': 1, 'lovely': 2, 'winters': 1, 'curfew': 1, 'disclosure': 1, 'salvation': 1, 'chips': 1, 'lifetime': 1, 'cosmos': 1, 'ace': 1, 'hole': 1, 'candle': 1, 'wine': 1, 'cemetery': 1, 'junction': 1, 'chains': 1, 'worry': 1, 'italian': 1, 'fubar': 1, 'gargoyles': 1, 'conquering': 1, 'changi': 1, 'keeping': 1, 'mum': 1, 'alphabet': 1, 'charge': 1, 'feather': 1, 'souled': 1, 'watermelon': 1, 'chicken': 1, 'paranormal': 1, 'penomena': 1, 'relic': 1, 'matilda': 1, 'miles': 1, 'profondo': 1, 'rosso': 1, 'smash': 1, 'camera': 1, 'grinch': 1, 'batteries': 1, 'wives': 1, 'lefay': 1, 'crossover': 1, 'wax': 1, 'passage': 1, 'marseille': 1, 'jawbreaker': 1, 'highway': 1, 'runnery': 1, 'eagle': 1, 'easel': 1, 'robocop': 1, 'gear': 1, 'solid': 1, 'orchid': 1, 'vicious': 1, 'priest': 1, 'cedric': 2, 'gibbons': 1, 'apartment': 1, 'sorted': 1, 'hutton': 1, 'hardwicke': 1, 'traci': 1, 'bingham': 1, 'movei': 1, 'movement': 2, 'fictional': 1, 'espionage': 1, 'fuminori': 1, 'kizaki': 1, 'trevor': 1, 'saloon': 1, 'thrown': 1, 'situations': 1, 'crossed': 1, 'axe': 1, 'horde': 1, 'withing': 1, 'geraint': 1, 'wyne': 1, 'davies': 1, 'website': 5, 'detailed': 2, 'opinions': 1, 'common': 1, 'already': 1, 'users': 1, 'evita': 5, 'comments': 1, 'boring': 1, 'sentiment': 1, 'site': 1, 'bit': 1, 'awesome': 1, 'glimpse': 1, 'clip': 1, 'extended': 1, 'highlights': 1, 'spoilers': 2, 'snippets': 1, 'cinemas': 1, '97': 2, 'uhhh': 1, 'darth': 6, 'vader': 6, 'chapter': 1, 'muppet': 1, 'isn': 1, 'potato': 8, 'favourite': 1, 'probably': 1, 'quiet': 1, 'front': 1, 'sunrise': 1, 'intolerance': 1, 'schindler': 1, 'north': 1, 'northwest': 1, 'hur': 1, 'mystic': 1, 'paradisio': 1, 'sierra': 1, 'madre': 1, 'heat': 3, 'connection': 1, 'breathless': 1, 'avventura': 1, 'saturday': 1, 'battleship': 1, 'potemkin': 1, 'un': 1, 'chien': 1, 'andalou': 1, 'strawberries': 1, 'ugestu': 1, 'monogatari': 1, 'ambersons': 1, 'zhivago': 1, 'exorcist': 1, 'towers': 1, 'dictator': 1, 'deadly': 1, 'darling': 1, 'clementine': 1, 'yojimbo': 1, 'cabinet': 1, 'caligari': 1, 'discreet': 1, 'charm': 1, 'bourgeoisie': 1, 'peeping': 1, 'marienbad': 1, 'noon': 1, 'wages': 1, 'splendor': 1, 'grass': 1, 'fargo': 1, 'ikiru': 1, 'singin': 2, 'aguirre': 1, ',': 1, 'wrath': 2, 'aparajito': 1, 'philadelphia': 1, 'manchurian': 1, 'candidate': 1, 'afternoon': 1, 'diabolique': 1, 'traffic': 1, 'cries': 1, 'whispers': 1, 'closely': 1, 'trains': 1, 'strangelove': 1, 'maltese': 1, 'falcon': 1, 'chinatown': 1, 'rules': 1, 'scrooge': 1, 'eraserhead': 1, 'apu': 1, 'grapes': 1, 'rosemary': 1, 'das': 1, 'boot': 1, 'stagecoach': 1, 'yankee': 1, 'doodle': 1, 'dandy': 1, 'kwai': 1, 'asphalt': 1, 'stranger': 3, 'alicein': 1, 'hara': 1}\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "d7pIJmZCgRMA"
+ },
+ "source": [
+ "### 1.2.4 Cumulative token frequency"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 529
+ },
+ "id": "1mi4Vw5ABFXg",
+ "outputId": "0bb72afe-75e2-4f9c-d64c-cc8694710a71"
+ },
+ "source": [
+ "# Plot the cumulative distribution of token frequency\n",
+ "def cumulative_token_frequency(series, limit=20):\n",
+ " '''\n",
+ " Input:\n",
+ " series - pd.Series of words\n",
+ " Output:\n",
+ " [plot] - cumulative distribution of token frequency\n",
+ " '''\n",
+ " corpus=[word for word in series]\n",
+ " counter=Counter(corpus)\n",
+ " tokens_count = dict(counter).items()\n",
+ "\n",
+ " prop_list = []\n",
+ " print(\"Vocabulary Size: \", len(tokens_count))\n",
+ " for i in range(limit):\n",
+ " tokens_filtered = len(list(filter(lambda x: x[1]<=i, tokens_count)))\n",
+ " prop_list.append(round(tokens_filtered*100/len(tokens_count),2))\n",
+ " a4_dims = (11.7, 8.27)\n",
+ " fig, ax = plt.subplots(figsize=a4_dims)\n",
+ " plt.plot(prop_list)\n",
+ " plt.grid()\n",
+ " plt.xlabel(\"Counts\")\n",
+ " plt.ylabel(\"Proportion of Vocabulary (%)\")\n",
+ " # print(\"Proportion of unique words less than\",limit,\": \", round(tokens_filtered*100/len(tokens_dict),2),\"%\")\n",
+ "\n",
+ "cumulative_token_frequency(df[\"Word\"])"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Vocabulary Size: 6710\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "KE4VV7X4fUwj"
+ },
+ "source": [
+ "### 1.2.5 Entity Frequency"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 546
+ },
+ "id": "dB_9MvjOAoqq",
+ "outputId": "d29bc77c-a1a2-490c-c03f-154021e36148"
+ },
+ "source": [
+ "tag_counter = plot_top_non_stopwords_barchart(df[\"Tag\"], top=25, word=False)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "There are 25 distinct tags in dataset\n",
+ "{'O': 61008, 'B-ACTOR': 3220, 'I-ACTOR': 3474, 'B-YEAR': 2858, 'B-TITLE': 2376, 'B-GENRE': 4354, 'I-GENRE': 786, 'B-DIRECTOR': 1720, 'I-DIRECTOR': 1850, 'B-SONG': 245, 'I-SONG': 446, 'B-PLOT': 1927, 'I-PLOT': 1687, 'B-REVIEW': 221, 'B-CHARACTER': 385, 'I-CHARACTER': 342, 'B-RATING': 2007, 'B-RATINGS_AVERAGE': 1869, 'I-RATINGS_AVERAGE': 1673, 'I-TITLE': 3495, 'I-RATING': 840, 'B-TRAILER': 113, 'I-TRAILER': 7, 'I-REVIEW': 132, 'I-YEAR': 2456}\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAxkAAAHuCAYAAADz68F3AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzda1xV1b7/8S9qASoQipl/L0jeBS9TpLR0l3jZdsrDzgu67IJluPfJTMlLaextGUmmbqxdnVctL6mnUMutaW0zj5fSklrgBLlkuBQ1y0KJjLaYIvwfeJi1AlxeFuDl83691oM5xphj/OaqB/wc4zeXl6QyAQAAAICH1KntAAAAAABcXUgyAAAAAHgUSQYAAAAAjyLJAAAAAOBRJBkAAAAAPKpebQeAy09+fr4OHjxY22EAAADgMhYcHKwbb7yx0j6SDFRw8OBBRURE1HYYAAAAuIw5HI4q+0gyUEG3ViHKf215bYcBAAAAN2589IHaDqFS1GQAAAAA8CiSDAAAAAAeRZIBAAAAwKNIMq5yzZs319q1a5Wbmyun06kFCxbouuuuq+2wAAAAcBUjybjK/fOf/9TatWvVvn17tW/fXg0bNtTzzz9f22EBAADgKkaScRWLjIzUyZMn9eabb0qSSktLFRcXp4cffli+vr61GxwAAACuWiQZV7HQ0FClpaW5tBUVFenQoUNq27atS3tsbKwcDoccDofq+HrXZJgAAAC4ypBkQJJkt9sVERGhiIgIlRb/UtvhAAAA4ApGknEVy8nJUXh4uEubn5+fWrVqJafTWUtRAQAA4GpHknEV27x5s+rXr68HHjj7S5B16tTR/Pnz9eabb6q4uLiWowMAAMDViiTjKnfvvfdqxIgRys3NVW5urk6ePKkZM2bUdlgAAAC4itWr7QBQvQ4fPqz//M//rO0wAAAAcA1hJwMAAACAR7GTgQoyDuUp4tEHajsMAAAAXKHYyQAAAADgUSQZAAAAADyKJAMAAACAR1GTgQq6tWqh716bV9thXJCbHp1S2yEAAADg/7CTAQAAAMCjSDIAAAAAeBRJBgAAAACPIsnwkJKSEpmmqfT0dKWlpal3795Vjm3btq3Wr18vp9Op1NRUbdmyRX379pUkxcTEKD8/X6ZpWp9OnTopODhYZWVleuyxx6x5/vGPfygmJkaStGTJEu3fv9+KITIy0hq3detW7dmzx5rvnXfeqaZvAQAAAKDw22OKi4tlGIYkadCgQUpMTNSdd95ZYZy3t7c++OADTZkyRevXr5ckhYaGqmfPntq+fbskaeXKlZowYYLLfcHBwfr+++81ceJEvf766zp9+nSFuadOnarVq1frzjvv1BtvvKH27dtbfffdd5/S0tI89bgAAABAldjJqAb+/v4qLCystO++++7Tzp07rQRDkrKzs7V06VK38x49elSbN2+2di+qsnPnTjVv3vzCggYAAAA8hJ0MD/H19ZVpmvLx8VGzZs1cjiv9VmhoqHbt2nXOuUaOHKk+ffpY1789ejVnzhxt2LBBixcvrvL+wYMHa+3atS5tb731loqLiyVJmzZt0rRp01z6Y2NjNW7cOElSHV/fc8YHAAAAnAtJhof89rhUr169tGzZMoWFhbm975///KfatWun3NxcDRs2TFLlx6XK5eXl6fPPP9fo0aMr9M2dO1ezZ89WixYtKtSEuDsuZbfbZbfbJUmnvj/iNm4AAACgKhyXqgYpKSkKCgpSkyZNlJCQYBVcS2ePRvXo0cMaO3ToUI0ZM0aNGjU67/lnz56tJ598Ul5eXi7tU6dOVYcOHfTkk0+ec6cDAAAAqE4kGdWgQ4cOqlu3rgoKChQfHy/DMKxdjrffflu33367hgwZYo2vX7/+Bc3/1VdfKScnx2WO33rllVdUp04dDRo06OIfAgAAALhIHJfykPKaDEny8vJSTEyMSktLK4w7efKk7rnnHv3973/XggUL9P3336uoqEgJCQnWmN/XZDz66KP69ttvXeZ5/vnnrfUqk5CQoGnTpumjjz6S5FqTcezYMQ0cOPDiHxYAAAA4By9JZbUdBC4vp74/oh9Wv1XbYVyQmx6dUtshAAAAXFMcDociIiIq7eO4FAAAAACP4rgUKsg4dFgR7AwAAADgIrGTAQAAAMCjSDIAAAAAeBRJBgAAAACPoiYDFXRt2UiHXq74i+K1pdXjb9d2CAAAALgA7GQAAAAA8CiSDAAAAAAeRZIBAAAAwKOoyahFRUVF8vPzs67HjBmjiRMnSpI6d+6sr776SmfOnNGHH36oPXv2qGfPnjpy5IhGjBghSerSpYsyMzMlSYsXL1ajRo30888/a/78+S7rlJSUWOMkacWKFZozZ051Px4AAACuUSQZl5E333xTb775piQpLy9P/fr1U0FBgSQpJiZGkjR79mzNnj1b0tkkxTAM6/6ZM2dWOm9xcbHLOAAAAKA6cVwKAAAAgEeRZFwDfH19ZZqm9YmOjq4wJjY2Vg6HQw6HQ3V8/SqZBQAAADg/HJe6BpzPcSm73S673S5J+uW7fTURFgAAAK5S7GRcBhISEqxdBgAAAOBKR5JxGYiPj5dhGBRnAwAA4KpAknGViY+P19dff219pIo1GYmJibUcJQAAAK5mXpLKajsIXF5++W6fvl/119oOw9Lq8bdrOwQAAAD8jsPhUERERKV97GQAAAAA8CjeLoUKdn/9gyLYPQAAAMBFYicDAAAAgEeRZAAAAADwKJIMAAAAAB5FTQYq6NQqQGn/fU9th2EJ/6/3azsEAAAAXAB2MgAAAAB4FEkGAAAAAI8iyQAAAADgUSQZ1aioqKjKvsaNG+vUqVP685//7NLetGlTJScny+l0KjU1VR988IG6du0q0zRlmqYKCgq0f/9+maapTZs2SZI6d+6szZs3a8+ePcrNzVV8fLw1X0xMjPLz82Wapr788ktNmjSpeh4WAAAA+D8kGbVkxIgRSklJkc1mc2lfs2aNtm3bprZt26pnz56aPn26/P39ZRiGDMPQunXrNHXqVBmGoYEDB8rHx0fr1q3TCy+8oI4dO6pbt2667bbb9Oijj1pzrly5UoZh6Pbbb9fTTz+tFi1a1PTjAgAA4BpCklFLbDabJk+erObNm6t58+aSpH79+un06dN6/fXXrXG7d+/Wjh07qpxn9OjR+vTTT61djeLiYj322GN66qmnKoz94Ycf5HQ61axZMw8/DQAAAPArkoxa0KJFCzVr1kwOh0OrVq3SyJEjJUlhYWFKS0u7oLlCQ0Mr3LN//341bNhQfn5+Lu0tW7aUj4+Pdu/eXWGe2NhYORwOORwO1fMNuMAnAgAAAH5FklELRo4cqVWrVkmSVqxYUeHIVHWsl5GRIafTqddee02//PJLhTF2u10RERGKiIhQSfHxao0HAAAAVzeSjBqQkJBgFW5LZ49KjRkzRnl5eVq3bp26du2qtm3bKjs7W+Hh4Rc0d05OToV7QkJC9PPPP1uF5ytXrrRqNV544QU1bdrUMw8GAAAAVIIkowbEx8dbhdvt2rVTw4YN1aJFC4WEhCgkJESJiYmy2WzasmWLvL29FRsba93bpUsX9enTp8q533rrLfXp00f9+/eXJPn4+Ojll1/Wiy++WGFsWlqali9frokTJ3r+IQEAAID/Q5JRw2w2m9asWePStnr1auvI1L333qsBAwbI6XQqKytLiYmJ+u6776qc7+TJk4qKilJ8fLz27NmjzMxMORwOvfLKK5WOnzNnjh566CE1bNjQcw8FAAAA/IaXpLLaDgKXl5+/z9VX/3yitsOwhP/X+7UdAgAAAH7H4XAoIiKi0j52MgAAAAB4VL3aDgCXny8PHVcEuwcAAAC4SOxkAAAAAPAokgwAAAAAHkWSAQAAAMCjqMlABe2DA7TJfletxjAwdkOtrg8AAICLx04GAAAAAI8iyQAAAADgUSQZAAAAADyKJMODSkpKZJqm0tPTlZaWpt69e1c5tnHjxjp16pT+/Oc/u7Q3bdpUycnJcjqdSk1N1QcffKCuXbvKNE2ZpqmCggLt379fpmlq06ZNkqTOnTtr8+bN2rNnj3JzcxUfH2/NFxMTo/z8fJmmqS+//FKTJk2qnocHAAAA/g9JhgcVFxfLMAx1795d06dPV2JiYpVjR4wYoZSUFNlsNpf2NWvWaNu2bWrbtq169uyp6dOny9/fX4ZhyDAMrVu3TlOnTpVhGBo4cKB8fHy0bt06vfDCC+rYsaO6deum2267TY8++qg158qVK2UYhm6//XY9/fTTatGiRbV9BwAAAABJRjXx9/dXYWFhlf02m02TJ09W8+bN1bx5c0lSv379dPr0ab3++uvWuN27d2vHjh1VzjN69Gh9+umn1q5GcXGxHnvsMT311FMVxv7www9yOp1q1qzZxT4WAAAA4BavsPUgX19fmaYpHx8fNWvWTJGRkZWOa9GihZo1ayaHw6FVq1Zp5MiR+vvf/66wsDClpaVd0JqhoaEV7tm/f78aNmwoPz8/l/aWLVvKx8dHu3fvrjBPbGysxo0bJ0m6zsf/gmIAAAAAfoudDA8qPy7VqVMnDR48WMuWLat03MiRI7Vq1SpJ0ooVKyocmfK0kSNHKiMjQ06nU6+99pp++eWXCmPsdrsiIiIUERGh0yd/qtZ4AAAAcHUjyagmKSkpCgoKUpMmTZSQkGAVbktnj0qNGTNGeXl5Wrdunbp27aq2bdsqOztb4eHhF7ROTk5OhXtCQkL0888/q6ioSNLZmozyWo0XXnhBTZs29cxDAgAAAJUgyagmHTp0UN26dVVQUKD4+HircLtdu3Zq2LChWrRooZCQEIWEhCgxMVE2m01btmyRt7e3YmNjrXm6dOmiPn36VLnOW2+9pT59+qh///6SJB8fH7388st68cUXK4xNS0vT8uXLNXHiRM8/MAAAAPB/SDI8qLwmwzRNrVy5UjExMSotLXUZY7PZtGbNGpe21atXW0em7r33Xg0YMEBOp1NZWVlKTEzUd999V+WaJ0+eVFRUlOLj47Vnzx5lZmbK4XDolVdeqXT8nDlz9NBDD6lhw4aX+LQAAABA5bwkldV2ELi8HM/P1Rfv1e5ux8DYDbW6PgAAAM7N4XAoIiKi0j52MgAAAAB4FK+wRQW5B4+zkwAAAICLxk4GAAAAAI8iyQAAAADgUSQZAAAAADyKmgxUcHNrf72z+I81uuaIhzfW6HoAAACoPuxkAAAAAPAokgwAAAAAHkWSAQAAAMCjSDJqWElJiUzTVHp6utLS0tS7d2+X/kceeUQrVqywrv38/OR0OhUSEqIlS5Zo//79Mk1Tpmnq008/dbl3zZo12rlzp0vbzJkzdfjwYZmmqezsbI0aNar6Hg4AAAAQSUaNKy4ulmEY6t69u6ZPn67ExESX/oULF6ply5bq37+/JGnWrFlavHix8vLyJElTp06VYRgyDEO33367dV9AQIDCw8MVEBCgkJAQlzmTkpJkGIaioqL0+uuvq1496v0BAABQfUgyapG/v78KCwsrtP/lL3/RggULFB4erv79+2vu3Llu5xo6dKjWr1+vFStWVLlb4XQ6deLECQUGBl5y7AAAAEBV+CftGubr6yvTNOXj46NmzZopMjKywpjMzExt3LhRmzdvVlRUlE6fPm31zZ07V/Hx8ZKk7Oxs3X///ZIkm82mWbNm6fvvv9fq1asr7JBIkmEY2rt3r44ePVqhLzY2VuPGjZMkefsEeORZAQAAcG0iyahh5celJKlXr15atmyZwsLCKox79dVXddddd+njjz92aZ86dapWr17t0nbjjTeqXbt22rFjhyTp9OnTCg0NVXZ2tiQpLi5ODz30kNq3b68hQ4ZUGpfdbpfdbpckFRz96tIeEgAAANc0jkvVopSUFAUFBalJkyZKSEiwCrolqbS0VKWlpec1T3R0tAIDA5WXl6e8vDy1bt1aNpvN6k9KSlJYWJiGDRumRYsWydvbu1qeBwAAAJBIMmpVhw4dVLduXRUUFCg+Pt4q6L5QNptNgwcPVkhIiEJCQhQeHl5pXcb69euVmpqqmJgYT4QPAAAAVIrjUjWsvCZDkry8vBQTE3PeOxaSa02GdHYXIzg4WCkpKVbbgQMHdPz4cd1yyy0V7p81a5befvtt2e12lZWVXcKTAAAAAJXzksRfmnBRcPQrbVn/eI2uOeLhjTW6HgAAAC6Nw+FQREREpX0clwIAAADgURyXQgX7D/zEzgIAAAAuGjsZAAAAADyKJAMAAACAR5FkAAAAAPAoajJQQXBrfy1cOqha13gk5qNqnR8AAAC1h50MAAAAAB5FkgEAAADAo0gyAAAAAHgUSUYtKyoqcrl+5JFHtGLFCuvaz89PTqdTISEhWrJkifbv3y/TNGWapj799FOXe9esWaOdO3e6tM2cOVOHDx+WaZrKzs7WqFGjqu9hAAAAAJFkXHYWLlyoli1bqn///pKkWbNmafHixcrLy5MkTZ06VYZhyDAM3X777dZ9AQEBCg8PV0BAgEJCQlzmTEpKkmEYioqK0uuvv6569aj3BwAAQPUhybgM/eUvf9GCBQsUHh6u/v37a+7cuW7vGTp0qNavX68VK1ZUuVvhdDp14sQJBQYGejpkAAAAwEKScRnKzMzUxo0btXnzZk2YMEGnT5+2+ubOnWsdl/qf//kfq91msyk5OVnJycmy2WyVzmsYhvbu3aujR49W6IuNjZXD4ZDD4ZCPT4DnHwoAAADXDM7NXKZeffVV3XXXXfr4449d2qdOnarVq1e7tN14441q166dduzYIUk6ffq0QkNDlZ2dLUmKi4vTQw89pPbt22vIkCGVrme322W32yVJ+Ue/8vTjAAAA4BrCTsZlIiEhwdqhkKTS0lKVlpae173R0dEKDAxUXl6e8vLy1Lp1a5fdjKSkJIWFhWnYsGFatGiRvL29q+UZAAAAAIkk47IRHx9vFXRfKJvNpsGDByskJEQhISEKDw+vtC5j/fr1Sk1NVUxMjCdCBgAAACpFknGF+W1NhmmaateunYKDg5WSkmKNOXDggI4fP65bbrmlwv2zZs3SE088IS8vr5oMGwAAANcQL0lltR0ELi/5R7/Sun9NqNY1Hon5qFrnBwAAQPVyOByKiIiotI+dDAAAAAAexdulUMHBAz+x0wAAAICLxk4GAAAAAI8iyQAAAADgUSQZAAAAADyKmgxU0CLEX3PfHlSta0wdTc0HAADA1YqdDAAAAAAeRZIBAAAAwKNIMgAAAAB4FDUZNaikpESZmZny8vLSmTNn9Nhjj2nnzp1W/5gxYzRx4kRJUufOnfXVV1/pzJkz+vDDD7Vnzx717NlTR44c0YgRIyRJXbp0UWZmpiRp8eLFatSokX7++WfNnz+/0nXLrVixQnPmzKnuxwUAAMA1iiSjBhUXF8swDEnSoEGDlJiYqDvvvNPqf/PNN/Xmm29KkvLy8tSvXz8VFBRIkmJiYiRJs2fP1uzZsyVJRUVF1nySNHPmTLfrAgAAANWN41K1xN/fX4WFhbUdBgAAAOBx7GTUIF9fX5mmKR8fHzVr1kyRkZE1um65xMRErVq1ymVMbGysxo0bd3a8T0CNxAUAAICrE0lGDfrtsaVevXpp2bJlCgsLq9F1q2K322W32yVJR459Ve0xAQAA4OrFcalakpKSoqCgIDVp0kQJCQkyTdNltwEAAAC4UpFk1JIOHTqobt26KigoUHx8vAzDoDgbAAAAVwWOS9Wg39ZGeHl5KSYmRqWlpR5dIz4+XpMmTbKuW7ZsWaEm48MPP9T06dM9ui4AAABQzktSWW0HgcvLkWNf6X8+mlCta0wd/VG1zg8AAIDq5XA4FBERUWkfx6UAAAAAeBTHpVDB4byf2GkAAADARWMnAwAAAIBHkWQAAAAA8CiSDAAAAAAeRU0GKrixjb8mvvtHj8750vCNHp0PAAAAly92MgAAAAB4FEkGAAAAAI8iyQAAAADgUSQZl6ikpESmaSo9PV1paWnq3bt3peNmzpypw4cPyzRNZWdna9SoUS79UVFRKisrU4cOHSRJKSkpMk1TBw8eVH5+vkzTlGmaCg4OVl5enho3bixJKisr07x586x5Jk+erJkzZ1rX9913nzIyMpSVlaX09HTZ7XYFBAR4+msAAAAALCQZl6i4uFiGYah79+6aPn26EhMTqxyblJQkwzAUFRWl119/XfXq/Vp3b7PZtH37dtlsNklSr169ZBiG/va3v2nlypUyDEOGYejgwYMuc548eVJDhw61ko7f+uMf/6i4uDjdddddCgsLU48ePfTZZ5+padOmHnp6AAAAoCKSDA/y9/dXYWGh23FOp1MnTpxQYGCgJKlBgwbq06ePxo4dW2GHw52SkhK98cYbiouLq9D39NNPa8qUKfr2228lSaWlpVqyZIlyc3MvaA0AAADgQvAK20vk6+sr0zTl4+OjZs2aKTIy0u09hmFo7969Onr0qKSzR6U+/PBD7d27VwUFBerRo4d27dp13jG8+uqr2r17t1588UWX9tDQ0POeJzY2VuPGjZMkNbye41QAAAC4eOxkXKLy41KdOnXS4MGDtWzZsirHxsXFKSsrS59//rmef/55q91ms2nFihWSpBUrVlhHps5XUVGRli1bpscff7zKMWFhYTJNU06nU9HR0RX67Xa7IiIiFBERoZ9PHb+g9QEAAIDfIsnwoJSUFAUFBalJkyZKSEiwirXLJSUlKSwsTMOGDdOiRYvk7e2twMBARUZGauHChcrLy9PUqVMrTQLcWbBggcaOHasGDRpYbdnZ2erRo4ckKSsrS4ZhaMOGDfL19b30hwUAAACqQJLhQR06dFDdunVVUFCg+Ph4q1j799avX6/U1FTFxMRo+PDhWr58uVq3bq2QkBC1atVKeXl56tu37wWtXVhYqFWrVmns2LFWW2JioubNm6fmzZtbbSQYAAAAqG7UZFyi8poMSfLy8lJMTIxKS0vd3jdr1iy9/fbb+vbbbzVnzhyXvtWrV1tvm7oQ8+fP12OPPWZdb9iwQU2aNNGGDRtUt25d/fjjj8rKytLGjRsvaF4AAADgQnhJKqvtIHB5OfjDV/r7lqrrOy7GS8NJbAAAAK4mDodDERERlfZxXAoAAACAR3FcChXk7/uJnQcAAABcNHYyAAAAAHgUSQYAAAAAjyLJAAAAAOBR1GSgAv+2QRq89mGPzffhnxZ7bC4AAABc/tjJAAAAAOBRJBkAAAAAPIokAwAAAIBHkWTUsJKSEpmmqfT0dKWlpal3796Vjps5c6YOHz4s0zSVmZmpIUOGWO2TJ0+uMD4qKkoZGRnKycnR7t27FRUVJUl65ZVXZJqmsrOzdeLECZmmKdM0NWzYsOp7SAAAAFzTKPyuYcXFxTIMQ5I0aNAgJSYm6s4776x0bFJSkubPn6+OHTtq+/btuvHGGysd17VrV82bN08DBw7UgQMH1Lp1a23atEn79+/XY489JkkKDg7W+++/b60NAAAAVBd2MmqRv7+/CgsL3Y7bs2ePSkpKFBQUVGn/lClTNHv2bB04cECSdODAASUmJmrq1KmeDBcAAAA4L+xk1DBfX1+ZpikfHx81a9ZMkZGRbu+55ZZbVFpaqqNHj1baHxoaqnnz5rm0paamavz48ecdV2xsrMaNGydJCri+4XnfBwAAAPweSUYN++1xqV69emnZsmUKCwurdGxcXJzuv/9+FRUVaeTIkdUal91ul91ulyR9VZhXrWsBAADg6sZxqVqUkpKioKAgNWnSRAkJCVZRdrmkpCQZhqE//OEP2rFjR5Xz5OTkKDw83KUtPDxc2dnZ1RY7AAAAUBWSjFrUoUMH1a1bVwUFBYqPj5dhGBdVmD1v3jxNnz5dwcHBks4Wec+YMUPz58/3dMgAAACAWxyXqmHlNRmS5OXlpZiYGJWWll7QHPHx8Zo0aZJ13bJlSz355JNav369rrvuOp0+fVrTpk1TRkaGR2MHAAAAzoeXpLLaDgKXl68K8zTx4+c8Nt+Hf1rssbkAAABweXA4HIqIiKi0z+1xqZtvvlnXX3+9JOmOO+7QhAkTFBAQ4NkIAQAAAFw13O5kmKapnj17qnXr1vrXv/6l9957T6Ghobr77rtrKETUtHNlpQAAAIB0iTsZpaWlOnPmjO6991794x//0LRp09SsWTOPBwkAAADg6uA2yTh9+rRGjRqlmJgYvf/++5Kk6667rtoDAwAAAHBlcptkPPTQQ+rdu7eef/55HThwQK1bt9by5ctrIjYAAAAAV6ALervUDTfcoJYtWyozM7MaQ0Jty/3hG03c5pk3Qm0Y+lePzAMAAIDLyyXVZGzdulV+fn4KDAzUrl27ZLfb+ZE3AAAAAFVym2QEBASoqKhIQ4cO1bJly9SrVy8NGDCgJmIDAAAAcAVym2TUq1dPN910k6Kjo63CbwAAAACoitskY9asWdq4caOcTqdSU1MVEhKivXv31kRsAAAAAK5AbpOMd999V926ddP48eMlSXl5eRo+fLjbiUtKSmSaptLT05WWlqbevXtXOm7mzJk6fPiwTNNUdna2Ro0a5dIfFRWlsrIydejQQZKUkpIi0zR18OBB5efnyzRNmaap4OBg5eXlqXHjxpKksrIyzZs3z5pn8uTJmjlzpnV93333KSMjQ1lZWUpPT5fdbrd+yfzuu+/Wrl27lJ6eruzsbI0bN87t85qmqeTkZEmSr6+vjh07Jj8/P5cxa9asUXR0tGJiYlxiN01TnTp1UnBwsE6cOGF9F0uXLlW9evWs++vWrav8/HwlJia6zNugQQO99tprcjqdSktLU2pqqh555BFJcpmz/PPAAw+4fR4AAADgYtVzN+Cll16q0Hb8+HGlpqZq3bp1Vd5XXFwswzAkSYMGDVJiYqLuvPPOSscmJSVp/vz5atu2rdLS0vTuu++qpKREkmSz2bR9+3bZbDY988wz6tWrlyQpJiZGPXv21IQJEyqd8+TJkxo6dKgSExNVUFDg0vfHP/5RcXFxuuuuu/Ttt9+qTp06iomJUdOmTfXvf/9bb7zxhm655RZ98803uv7669W6detzfkcdO3ZU3bp11bdvX9WvX18nTpzQxo0bde+992rZsmWSJH9/f/Xp00ejR49WdHS0Vq5cWSH24OBg7du3T1EuwrsAACAASURBVIZhqE6dOtq0aZOio6P19ttvS5IGDhyo3NxcjRgxQtOnT7fuW7hwofbv36927dqprKxMQUFBevjhh63+8jkBAACAmuB2J8PHx0fdu3fX3r17tXfvXnXt2lUtWrTQ2LFjlZSUdF6L+Pv7q7Cw0O04p9OpEydOKDAwUNLZf6Hv06ePxo4dW2GHw52SkhK98cYbiouLq9D39NNPa8qUKfr2228lnf1V8yVLlig3N1d+fn6qV6+elZicOnVKubm551zLZrNp+fLl+uijjxQVFSVJSk5Odon53nvv1caNG1VcXHxe8ZeWluqLL75Q8+bNXdZ56aWXdOjQIWtn6Oabb9Ytt9yi+Ph4lZWdfRvxsWPH9OKLL57XOuViY2PlcDjkcDjk713/gu4FAAAAfsvtTkbXrl11++23q7S0VJL03//939q+fbv69Olzzt/L8PX1lWma8vHxUbNmzRQZGek2GMMwtHfvXh09elTS2aNSH374ofbu3auCggL16NFDu3btOt9n06uvvqrdu3dX+IM7NDS0ynkKCwu1bt06HTx4UJs3b9b777+v5ORk6w/4yowcOVIDBw5Ux44dNWHCBCUnJ2vjxo1auHChGjVqpB9++EGjRo3SK6+84nJPnz59rOvfHyfz9vbWrbfeqokTJ1rXAwYM0J///GfdcMMNstls2rlzp0JDQ5WRkXHO+Nq0aSPTNK3rCRMmaMeOHS5j7Ha77Ha7pLO/kwEAAABcLLc7GYGBgWrYsKF13aBBAzVq1EilpaX65Zdfqryv/LhUp06dNHjwYOvYUGXi4uKUlZWlzz//XM8//7zVbrPZtGLFCknSihUrZLPZzuuhyhUVFWnZsmV6/PHHqxwTFhYm0zTldDoVHR0t6ey/6vfv319ffPGFpkyZosWLq/5huvDwcB07dkxff/21Nm/eLMMwFBgYqNOnT2vdunUaPny4GjduLMMwtHHjRuu+lStXyjAM63Py5ElJvyYE33//vY4cOWIlcvfcc4+2bt2qkydPavXq1frTn/6kOnUq/uebMWOGTNPUN9/8miiUH5cq//w+wQAAAAA8yW2S8eKLLyo9PV2LFy/WkiVLZJqm5s6dq/r16+t///d/z2uRlJQUBQUFqUmTJkpISLAKkMslJSUpLCxMw4YN06JFi+Tt7a3AwEBFRkZq4cKFysvL09SpU60k4EIsWLBAY8eOVYMGDay27Oxs9ejRQ5KUlZUlwzC0YcMG+fr6WmOysrK0YMECDRw4UMOGDatyfpvNpo4dOyovL0/79u2Tv7+/Nb78yNTw4cP13nvvWXUm51KeELRp00bh4eEaMmSItc6AAQOUl5entLQ0NW7cWJGRkcrJyVG3bt3k5eUlSZo9e7YMw5C/v/8Ff1cAAACAJ7hNMhYvXqzbbrtNa9eu1Zo1a9SnTx8tWrRIJ06c0LRp085rkQ4dOqhu3boqKChQfHy89S/qv7d+/XqlpqYqJiZGw4cP1/Lly9W6dWuFhISoVatWysvLU9++fS/oAQsLC7Vq1SqNHTvWaktMTNS8efNc6h3KE4wGDRrojjvusNq7d++ugwcPVjq3l5eXoqOj1aVLF4WEhCgkJERRUVHWjsu2bdvUrl07jR8/3nrz1PkqKCjQU089penTp8vPz099+/ZVq1atrHXGjx8vm82mffv2KTU1VQkJCdbOhre3t5V0AAAAADXNbZIhnX1T05EjR1RYWKi2bdue1x/65TUZpmlq5cqViomJseo6zmXWrFl64oknZLPZtGbNGpe+1atXX/CRKUmaP3++goKCrOsNGzbo5Zdf1oYNG5Sdna1PP/1UZ86c0caNG+Xl5aVp06Zpz549Mk1Tzz77rMaMGVPpvH379tU333yjI0eOWG2ffPKJOnfurJtuukllZWV699131bhxY3388ccu944cOdLltbKVveJ37dq1ql+/vuLi4rRlyxadOnXK6nvvvfc0ZMgQXX/99XrkkUfUuHFjOZ1OORwObdq0ySUBLD+CVf6p6o1cAAAAgCd4Saq6YljS2LFjNXHiRLVo0ULp6enq1auXdu7cqf79+9dQiKhpuT98o4nbqq5DuRAbhv7VI/MAAADg8uJwOBQREVFpn9u3S02cOFERERFKSUlRZGSkOnTooNmzZ3s8SFw+ju/7luQAAAAAF81tknHy5EnrLVLXX3+9vvrqK+vXt68lM2bM0IgRI1za3nnnHRIuAAAA4HfcJhmHDx9WQECA1q5dq02bNqmwsLDKQuir2ezZs0koAAAAgPPgNskYOnSoJOnZZ5/V1q1bFRAQoA0bNlR7YAAAAACuTG6TjGXLlunBBx+UdPbNSb9vw9UnoE0r/cfqf3hkrn8N401WAAAA1xq3r7ANDQ11vaFOHYWHh1dbQAAAAACubFUmGU899ZR++uknde3aVcePH9fx48f1008/KT8/X++9915NxggAAADgClJlkvHCCy/I399fc+fOVUBAgAICAuTv76+goCDNmDGjJmMEAAAAcAVxe1zqWk0oioqKKm2fOXOmDh8+LNM0lZubq9WrV6tTp05W/9atW63jZHl5edq9e7cyMjK0bds2tWrVyhpXUlLi8ivcTz75pCSpXr16SkxMVG5urtLS0vTZZ59p8ODBSklJkWmaOnjwoPLz8637goOD5e/vr6VLl2rv3r1yOp1aunSp/P39JUnBwcE6ceKETNNUdna2li5dqnr13JbiAAAAABfNbZKBipKSkmQYhtq3b6+VK1dqy5YtCgoKqnRsv3791K1bN23btk3x8fFWe3FxsQzDsD5z5syRJD333HNq1qyZwsLCFB4erj/96U/y8/NTr169ZBiG/va3v2nlypXWfQcPHtSiRYu0f/9+tWvXTm3btlVeXp4WLlxorbVv3z4ZhqEuXbqoRYsWio6Ort4vCAAAANc0koxLtGrVKn300UcaPXr0Ocft3LlTzZs3P+cYX19fxcbGasKECTp16pQkKT8/X++8806V97Rp00bh4eF67rnnrLZZs2apZ8+euvnmm13GlpaW6osvvnAbBwAAAHApzuvcTJ06ddS0aVOXYzZff/11tQV1pdm1a5c6dux4zjGDBw/W2rVrrWtfX1+ZpmldJyYm6ssvv9ShQ4eqPKpVmc6dOys9PV2lpaVWW2lpqdLT0xUaGqrdu3db7d7e3rr11ls1ceLECvPExsZq3LhxkiT/633Oe30AAADg99wmGY899phmzpyp77//3vpDtqysTN26dav24K4UXl5eVfZt3bpVjRo10s8//6y//vWvVnv5canf6tKlS7XE16ZNG5mmqZCQEH3wwQfKzMysMMZut8tut0uScn/4vlriAAAAwLXB7XGpiRMnqkOHDgoLC1PXrl3VtWvXayrBSEhIsIqsq2IYhr788stK+/r166fg4GClp6fr2WefPedaTqdTrVq1kp+f33nHl5OTo+7du7skOl5eXurevbtycnIk/VqTUX60asiQIec9PwAAAHCh3CYZX3/9tY4fP14TsVyW4uPjrSLrygwdOlSDBg1ScnJylXOcOXNGkyZN0oMPPqjAwMAqxxUXF2vRokV66aWXdN1110mSgoKCNHz48Crv2bdvn0zTdCkqj4+P165du7Rv3z6XsQUFBXrqqac0ffr0KucDAAAALpXbJGP//v3atm2bnnrqKcXFxVmfa1lcXJz1Ctv7779fkZGROnbs2Dnv+e6775ScnKzx48dL+rUmo/yTmJgo6WyCcPToUeXk5CgzM1Pvv/++fvrpp3POPXbsWLVv315Op1NOp1Pt27fX2LFjKx27du1a1a9fX3369LmIJwcAAADc85JUdq4Bf/vb3yptnzVrVnXEg8tA7g/fa9LWVR6Z61/DJnhkHgAAAFxeHA6HIiIiKu1zW/hdnkw0aNBAkvTvf//bg6EBAAAAuNq4TTJCQ0O1fPlyNWrUSJJ07NgxPfjgg1ZRMa4+x/cdYgcCAAAAF81tTcYbb7yhJ554Qq1bt1br1q01efJk61WnAAAAAPB7bpOMBg0aaNu2bdb1xx9/bB2dAgAAAIDfO6+3S8XHxys4OFjBwcF6+umntX///pqIDQAAAMAVyG1NxsMPP6xnn31W//znPyVJ27dv18MPP1ztgaH2BLRprbvfXXLJ83ww/CEPRAMAAIArjdsk48cff9TEiRNrIhYAAAAAVwG3SUZ4eLhmzJih1q1bq169X4d369atWgMDAAAAcGVym2S89dZbmjp1qjIzM1VaWloTMQEAAAC4grkt/D569KjWr1+vAwcO6NChQ9bnalNSUiLTNJWenq60tDT17t270nEzZ87U4cOHZZqmcnNztXr1anXq1Mnq37p1q8LDwyVJeXl52r17tzIyMrRt2za1atWqwnrlnyeffFKSVK9ePSUmJio3N1dpaWn67LPPNHjwYKWkpMg0TR08eFD5+fnWfcHBwfL399fSpUu1d+9eOZ1OLV26VP7+/pKk4OBgnThxQqZpKjs7W0uXLnXZkQIAAAA8ze1fmzNnzpTdbtfmzZv1yy+/WO1r1qyp1sBqWnFxsQzDkCQNGjRIiYmJuvPOOysdm5SUpPnz50uSoqOjtWXLFnXp0kXHjh2rMLZfv34qKCjQM888o/j4eI0bN67Cer/13HPPqVmzZgoLC9OpU6d044036o477lCvXr0kSTExMerZs6cmTPj1x/LeeecdZWVlKSYmRpL0zDPPaOHChYqOjpYk7du3T4ZhqE6dOtq0aZOio6P19ttvX+Q3BQAAAJyb2yTjoYceUseOHXXddddZx6XKysquuiTjt/z9/VVYWHheY1etWqW7775bo0eP1ssvv1zluJ07d+rxxx8/51y+vr6KjY1VSEiITp06JUnKz8/XO++8U+U9bdq0UXh4uEaOHGm1zZo1S06nUzfffLPOnDljtZeWluqLL75Q8+bNz+vZAAAAgIvhNsmIiIhQx44dayKWWuXr6yvTNOXj46NmzZopMjLyvO/dtWuX2+9o8ODBWrt2bYX1yiUmJurLL7/UoUOHVFRUdN5rd+7cWenp6S71MqWlpUpPT1doaKh2795ttXt7e+vWW2+t9G1hsbGx1i6L//U+570+AAAA8Htuk4zPPvtMnTp10pdfflkT8dSa3x5f6tWrl5YtW6awsLDzutfLy6vKvq1bt6pRo0b6+eef9de//rXS9cp16dLlIiJ3r02bNjJNUyEhIfrggw+UmZlZYYzdbpfdbpck5f5wtFriAAAAwLXBbeF3r169lJ6erj179igjI8MqZL6apaSkKCgoSE2aNFFCQoJVZF0VwzCqTML69eun4OBgpaen69lnnz3nuk6nU61atZKfn995x5qTk6Pu3bu7JDpeXl7q3r27cnJyJP1ak1F+tGrIkCHnPT8AAABwodwmGYMHD1a7du00aNAgDRkyRPfcc89V/0dqhw4dVLduXRUUFCg+Pl6GYVRapC1JQ4cO1aBBg5ScnFzlfGfOnNGkSZP04IMPKjAwsMpxxcXFWrRokV566SVdd911kqSgoCANHz68ynv27dsn0zQVHx9vtcXHx2vXrl3at2+fy9iCggI99dRTmj59epXzAQAAAJfKbZJx6NAh/fTTTwoICFDjxo2tz9WmvEbCNE2tXLlSMTExVf4uSFxcnPUK2/vvv1+RkZGVvlnqt7777jslJydr/PjxFdYzTVOJiYmSziYIR48eVU5OjjIzM/X+++/rp59+OufcY8eOVfv27eV0OuV0OtW+fXuNHTu20rFr165V/fr11adPH3dfCQAAAHBRvCSVnWvArFmzNGbMGO3bt09lZWeHlpWVqX///jURH2pB7g9HFbfl/Uue54PhD3kgGgAAAFyOHA6HIiIiKu1zW/gdHR2tNm3a6PTp0x4PDAAAAMDVx22SkZWVpRtuuEFHj/LGoWvF8X0H2IUAAADARXObZCQmJso0TWVlZbn84ndUVFS1BgYAAADgyuQ2yVi6dKnmzJmjzMzMKguhAQAAAKCc2yTjxIkT+sc//lETsQAAAAC4CrhNMrZv367Zs2dr3bp1LselzvXjdLiyBdx8s+5+Z+Ulz/PBiJEeiAYAAABXGrdJRvmP0PXq1ctq4xW2AAAAAKriNsmIjIysiTgAAAAAXCXc/uK3v7+/5s+fL4fDIYfDoXnz5snf378mYgMAAABwBXKbZCxevFhFRUWKjo5WdHS0fvrpJy1ZsqQmYrsmFBUVVdo+c+ZMHT58WKZpKjMzU0OGDLHaJ0+eXGF8VFSUMjIylJOTo927d1uvGH7llVdkmqays7N14sQJmaYp0zQ1bNiw6nsoAAAAXNPcHpdq06aNhg8fbl3PmjWLou8akpSUpPnz56tjx47avn27brzxxkrHde3aVfPmzdPAgQN14MABtW7dWps2bdL+/fv12GOPSZKCg4P1/vvvWzU2AAAAQHVxu5NRXFys22+/3bq+7bbbVFxcXK1BwdWePXtUUlKioKCgSvunTJmi2bNn68CBA5KkAwcOKDExUVOnTq3BKAEAAICz3O5k/Nd//ZeWLl2qgIAASVJhYaHGjBlT3XHhN2655RaVlpbq6NGjlfaHhoZq3rx5Lm2pqakaP378ea8RGxurcePGSZL8vb0vPlgAAABc89wmGRkZGerevbv8/PwkVV1DAM+Li4vT/fffr6KiIo0cWb2/OWG322W32yVJuQUF1boWAAAArm5uj0s9//zzCggIUFFRkYqKinTDDTfoueeeq4nYrikJCQlWUXa5pKQkGYahP/zhD9qxY0eV9+bk5Cg8PNylLTw8XNnZ2dUWLwAAAFAVt0nGXXfdpePHj1vXP/74o/7jP/6jWoO6FsXHx8swjIsqzJ43b56mT5+u4OBgSWeLvGfMmKH58+d7OkwAAADALbfHperWravrr79ep06dkiT5+PjImzP7tSo+Pl6TJk2yrlu2bKknn3xS69ev13XXXafTp09r2rRpysjIqMUoAQAAcK3yklR2rgHTpk3TkCFDrN/GeOihh7Ru3TrNnTu3JuJDLcgtKFDclv+95Hk+GFG9dSQAAACoPQ6HQxEREZX2ud3JePHFF5WRkaEBAwZIkp577jl99NFHno0QAAAAwFXD7U4Grj3nykoBAAAA6dx/M7ot/AYAAACAC0GSAQAAAMCj3CYZjz/++Hm1AQAAAIB0HjUZaWlpFX7obdeuXerRo0d1xoVatLfgR03e8uklzbFuxN0eigYAAACXo4t6u9SoUaM0evRohYSE6L333rPa/f399cMPP3g+SgAAAABXhSqTjM8++0xHjhxRUFCQyy9HFxUVaffu3TUSHAAAAIArT5VJxqFDh3To0CENGDBAxcXFKisrU7t27dSxY0dlZmbWZIwAAAAAriBuC78/+eQT+fj46P/9v/+njz76SA888IDefPPNGggNAAAAwJXIbZLh5eWl4uJiDR06VK+99pqio6MVGhp6wQsVFRVV2j5z5kwdPnxYpmkqOztbo0aNcumPiopSWVmZOnToIElKSUmRaZo6ePCg8vPzZZqmTNNUcHCw8vLy1LhxY0lSWVmZ5s2bZ80zefJkzZw507q+7777lJGRoaysLKWnp8tutysgIECSdPfdd2vXrl1KT09Xdna2xo0b5/b5TNNUcnKyJMnX11fHjh2Tn5+fy5g1a9YoOjpaMTExLrGbpqlOnTopODhYJ06csL6LpUuXql69Xzeb6tatq/z8fCUmJrrM26BBA7322mtyOp1KS0tTamqqHnnkEUlymbP888ADD7h9HgAAAOBinVeS0atXL91333364IMPJJ39Y9eTkpKSZBiGoqKi9Prrr7v8YW2z2bR9+3bZbDZJUq9evWQYhv72t79p5cqVMgxDhmHo4MGDLnOePHlSQ4cOtZKO3/rjH/+ouLg43XXXXQoLC1OPHj302WefqWnTpqpXr57eeOMNDRkyRN27d5dhGNq2bds54+/YsaPq1q2rvn37qn79+iouLtbGjRt17733WmP8/f3Vp08frV+/XpJcYjcMQ19++aUkad++fTIMQ126dFGLFi0UHR1tzTFw4EDl5uZqxIgRLusvXLhQhYWFateuncLDwzV48GA1atTI6i+fs/yzfPnycz4PAAAAcCncJhmTJk3S9OnTtWbNGuXk5CgkJERbt26tlmCcTqdOnDihwMBASWf/hb5Pnz4aO3ZshR0Od0pKSvTGG28oLi6uQt/TTz+tKVOm6Ntvv5UklZaWasmSJcrNzZWfn5/q1aungoICSdKpU6eUm5t7zrVsNpuWL1+ujz76SFFRUZKk5ORkl5jvvfdebdy4UcXFxecVf2lpqb744gs1b97cZZ2XXnpJhw4dUu/evSVJN998s2655RbFx8errOzs24iPHTumF1988bzWKRcbGyuHwyGHwyF/7+su6F4AAADgt86rJiMqKkqvvvqqGjRooLy8PE2cOLFagjEMQ3v37tXRo0clnT0q9eGHH2rv3r0qKCi44N/mePXVV3XffffJ39/fpT00NFS7du2q9J7CwkKtW7dOBw8e1Ntvv63Ro0fLy8vrnOuMHDlSK1asUHJysrXjsnHjRvXo0cPaURg1apR1nKr8nt8eYfLx8XGZ09vbW7feeqs+/PBD63rAgAFav369yzqhoaHKyMiwEozKtGnTxmWtPn36VBhjt9sVERGhiIgI/fTL6XM+LwAAAHAubpOMsLAw7dq1S9nZ2crJyVFqaqo6d+7s0SDi4uKUlZWlzz//XM8//7zVbrPZtGLFCknSihUrrD+sz1dRUZGWLVt2zl8oDwsLk2macjqd1tGk2NhY9e/fX1988YWmTJmixYsXV3l/eHi4jh07pq+//lqbN2+WYRgKDAzU6dOntW7dOg0fPlyNGzeWYRjauHGjdd/vj0udPHlS0q8Jwffff68jR45Yb/K65557tHXrVp08eVKrV6/Wn/70J9WpU/E/34wZM2Sapr755hur7ffHpXbs2HFB3yMAAABwIdwmGa+//rqeeOIJtW7dWsHBwZo8ebLsdvtFL5iQkGD9i3q5pKQkhYWFadiwYVq0aJG8vb0VGBioyMhILVy4UHl5eZo6dapLfcL5WrBggcaOHasGDRpYbdnZ2dauSFZWlgzD0IYNG+Tr62uNycrK0oIFCzRw4EANGzasyvltNps6duyovLw87du3T/7+/tb48iNTw4cP13vvvaeSkhK38ZYnBG3atFF4eLiGDBlirTNgwADl5eUpLS1NjRs3VmRkpHJyctStWzdrt2X27NkyDKPC7g0AAABQU9wmGQ0aNHApfP74449d/mC/UPHx8da/qP/e+vXrlZqaqpiYGA0fPlzLly9X69atFRISolatWikvL099+/a9oPUKCwu1atUqjR071mpLTEzUvHnzXOodyhOMBg0a6I477rDau3fvXqGovJyXl5eio6PVpUsXhYSEKCQkRFFRUdaOy7Zt29SuXTuNHz/e5ajU+SgoKNBTTz2l6dOny8/PT3379lWrVq2sdcaPHy+bzaZ9+/YpNTVVCQkJ1s6Gt7e32yNeAAAAQHVxm2Ts379f8fHxCg4OVnBwsJ5++mnt37+/2gKaNWuWnnjiCdlsNq1Zs8alb/Xq1Rd8ZEqS5s+fr6CgIOt6w4YNevnll7VhwwZlZ2fr008/1ZkzZ7Rx40Z5eXlp2rRp2rNnj0zT1LPPPqsxY8ZUOm/fvn31zTff6MiRI1bbJ598os6dO+umm25SWVmZ3n33XTVu3Fgff/yxy72/r8koL+T+rbVr16p+/fqKi4vTli1bdOrUKavvvffe05AhQ3T99dfrkUceUePGjeV0OuVwOLRp0yZNmzbNGvv7mowJEyZc8HcIAAAAnC8vSVVXDEu64YYb9Oyzz1rFwtu3b9czzzyjH3/8sSbiQy3YW/CjJm/59JLmWDfibg9FAwAAgMuRw+FQREREpX1ukwxce871PwwAAAAgnftvxnqVturscZxzKf89iGvFjBkzKvwI3jvvvKPZs2fXUkQAAADA5anKnYz8/Hx9/fXXSk5O1ueff16hkPiTTz6pifhQC9jJAAAAgDsXtZNx0003aeDAgbLZbBo9erT+f3v3HhZVtf8P/D3cvXERFQl0IC8cg6TRgylqppbXU5poQFZYRtrRUkPUzuGEFgVGhaWeUlTsoqIexFJTo5QvpoIDs7mjcplUTAHRPOQNxfX7g5/7OM4MeBkcjPfreT7Pw15r7bXWnkUTH9feMzt27MCGDRtQWFjYZBMlIiIiIqIHn9Ek4/r169i9ezd2794NGxsbBAcHIzU1FYsWLcLy5cvv5xzpPnPs5oXx/9nXYJutE+/so4SJiIiIqOUwmmQAgI2NDcaOHYvg4GB4eHjg888/1/tYWSIiIiIiopsZTTK++uor+Pj44IcffsCiRYtQUFBwP+dFREREREQPKKMPftfV1eHChQsAACH+10ShUEAIAQcHh/syQbr/is/+F+F7chpsw9uliIiIiFq2hh78NvqN35aWlrC3t4e9vT0cHBzkuHFMhtXU1Bgsj4yMRHl5OSRJQkFBAYKCgnTqx40bByEEvLy8AADp6emQJAnHjh1DZWWl/G3dSqUSWq0Wzs7OAOoTwI8//ljuJywsDJGRkfLx5MmTkZOTg/z8fGRnZyM+Pp7rR0RERERNymiSQaYXFxcHlUqFcePGYcWKFbCy+t/dasHBwdi3bx+Cg4MBAP3794dKpcK7776LjRs3QqVSQaVS4dixYzp9Xr58GRMmTJCTjpuNHDkSc+bMwejRo+Hj44M+ffrgwIEDcHFxadoLJSIiIqIWjUmGGZSUlODixYtwcnICALRp0waDBg3C1KlT9XY4GnPt2jWsXLkSc+bM0av75z//iblz5+K3334DUP+JYQkJCTh69Oi9XwQRERERkRFMMsxApVKhuLgYVVVVAOpvldq1axeKi4tRXV2NPn363FF/y5cvx+TJk2Fvb69T7u3tDY1Gc1t9hIaGQq1W/2uaVAAAIABJREFUQ61Ww97G+o7GJyIiIiK6GZOM+2jOnDnIz89HRkYGPvjgA7k8ODgYiYmJAIDExET5lqnbVVNTg6+//hpvvfWW0TY+Pj6QJAklJSV4/vnn9erj4+Ph5+cHPz8//Lf26h2NT0RERER0MyYZTSQqKkp+WPuGuLg4+Pj4ICAgAKtXr4atrS2cnJwwbNgwrFq1ClqtFuHh4QaTgMYsWbIEU6dORZs2beSygoICeVckPz8fKpUKO3fuRKtWre79AomIiIiIjGCS0UQiIiLkh7VvtW3bNmRmZiIkJAQTJ07EN998Aw8PD3h6eqJr167QarUYPPjOPiL23Llz2LRpE6ZOnSqXRUdH4+OPP4abm5tcxgSDiIiIiJpag9/4TU3nvffew/r16/Hbb79h8eLFOnVJSUnyp03diU8++QQzZ86Uj3fu3ImOHTti586dsLS0xO+//478/Hzs3r3bJNdARERERGSI0S/jo5aLX8ZHRERERI25qy/jIyIiIiIiuhu8XYr0/F56hDsVRERERHTXuJNBREREREQmxSSDiIiIiIhMikkGERERERGZFJ/JID1O3bwx6T8FRus3T/S+j7MhIiIiogcNdzKIiIiIiMikmGQQEREREZFJMckgIiIiIiKTYpLRRGpqaozWde/eHdu2bUNJSQkyMzOxZ88eDB5c/70UISEhqKyshCRJcvTq1QtKpRJCCMycOVPuZ+nSpQgJCQEAJCQkoKysDJIkITs7G8OGDZPb7d27F4cPH5b727x5cxNdNRERERERH/y+72xtbbFjxw7MnTsX27ZtAwB4e3vjr3/9K/bt2wcA2LhxI958802d85RKJSoqKjBr1iysWLECV69e1es7PDwcSUlJePLJJ7Fy5Ur07NlTrps8eTKysrKa8MqIiIiIiOpxJ+M+mzx5Mg4ePCgnGABQUFCAr776qtFzq6qq8PPPP8u7F8YcPHgQbm5u9zxXIiIiIqK7wZ2M+8zb2xsajabBNoGBgRg0aJB8PGDAAPnnxYsXY+fOnVizZo3R80eNGoWtW7fqlK1btw6XLl0CAKSkpGDevHk69aGhoXj99dcBAPY2lrd3MUREREREBjDJMLMtW7agR48eOHr0KAICAgAYvl3qBq1Wi4yMDLzwwgt6dbGxsfjwww/h7u6uk5gAjd8uFR8fj/j4eABAydmLd3s5RERERES8XaqpRUVFyQ9cA/W3RvXp00eunzBhAqZMmYL27dvfdp8ffvgh5s+fD4VCoVMeHh4OLy8vzJ8/v8GdDiIiIiKipsQko4lFRERApVJBpVIBANavX4+BAwfimWeekdu0bt36jvo8cuQICgsLdfq42bJly2BhYYERI0bc/cSJiIiIiO4Sb5e6zy5fvoy//e1v+PTTT7FkyRJUVFSgpqYGUVFRcptbn8n4+9//jt9++02nnw8++EDeHTEkKioK8+bNw48//ghA95mMM2fO4OmnnzblZRERERERyRQAhLknQc1LydmLeGeP1mj95one93E2RERERNQcqdVq+Pn5Gazj7VJERERERGRSvF2K9JwrLcDmiYazUiIiIiKixnAng4iIiIiITIpJBhERERERmRSTDCIiIiIiMik+k0F6HurWG5FbTuqULZrgZqbZEBEREdGDhjsZRERERERkUkwyiIiIiIjIpJhkEBERERGRSTHJuM9qamoMlvfs2RN79+6FJEkoLCzEihUr5LqBAwciIyMDRUVFKCoqQmhoqFwXGRmJCxcuoGPHjgbH6NSpE9atW4fS0lJkZmbiwIEDGD9+fBNcGRERERFRPSYZzcTnn3+OuLg4qFQqPPLII1i6dCkAwMXFBevXr8f06dPRq1cvDBo0CNOmTcOYMWPkc8+cOYOwsDCD/W7duhVpaWno1q0b/vrXvyIoKAju7u735ZqIiIiIqGViktFMuLq6ory8XD7Oz88HAMyYMQNr166FJEkAgOrqasybNw8LFiyQ265ZswaBgYFwcnLS6XPYsGGora3V2RU5fvw4li1b1pSXQkREREQtHJOMZiIuLg579uzBDz/8gNmzZ8PBwQEA4O3tjaysLJ22mZmZ8Pb2lo//+OMPrFmzBrNmzdJp5+3tDY1Gc1vjh4aGQq1WQ61Wo7Utfy2IiIiI6O7xr8lmYu3atejVqxc2b96MJ598Eunp6bCxsbnt8z///HOEhISgbdu2RtssW7YM2dnZOHTokF5dfHw8/Pz84Ofnh4tXrt/VNRARERERAUwyzCYqKgqSJMm3QQHAqVOnkJCQgPHjx+PatWvw8fFBYWEh+vbtq3Nu3759UVBQoFN2/vx5rF+/HjNmzJDLCgoK0KdPH/l45syZGD58uM5D4kREREREpsYkw0wiIiKgUqmgUqkAACNHjoSVVf0XsLu4uMDZ2RknT57E8uXLMWXKFPj6+gIA2rdvj8WLF+Ojjz7S6/PTTz/FtGnT5H727NkDOzs7TJ8+XW7TunXrpr40IiIiImrhrMw9Aao3YsQIfPbZZ7h8+TIAIDw8HBUVFQCAF198EfHx8WjXrh0UCgWWLFmC7du36/VRXV2N5ORkvP3223LZ+PHjERcXh3nz5qGqqgoXLlzA/Pnz789FEREREVGLpAAgzD0Jal5Onr2ClalndMoWTXAz02yIiIiIqDlSq9Xw8/MzWMfbpYiIiIiIyKR4uxTp+a00F4smGM5KiYiIiIgaw50MIiIiIiIyKSYZRERERERkUkwyiIiIiIjIpPhMBunx7OaLb/9TKR+/OLGTGWdDRERERA8a7mQQEREREZFJMckgIiIiIiKTYpJBREREREQm9adPMq5duwZJkpCdnY2srCwMGDDAaNuXXnoJeXl5yM3NhUajQVhYGAAgISEBAQEBOm1ramp0jmfNmoVLly7B3t5eLhsyZAh+//13SJKEoqIixMbG6pzj7OyM2tpaTJs2TafcxcUFGzZsQElJCTIzM7Fjxw707t0bkiRBkiRUV1ejrKwMkiQhJSUFSqUSFy9elOslScJLL70EANBqtcjNzUVOTg5SU1PRtWvXO38RiYiIiIjukPgzR01NjfzziBEjRGpqqsF2o0aNEllZWcLV1VUAEDY2NuK1114TAERCQoIICAgw2i8AkZ6eLtLS0sSUKVPksiFDhoht27YJAMLOzk4UFRUJf39/uX769OkiLS1Nb04HDhwQ06ZNk4979+4tBg0aJB/fOh+lUiny8vIMXpdWqxXOzs4CgFi4cKFYuXJlo6/ZmbO14tv/VMph7jVkMBgMBoPBYDS/UKvVRuv+9DsZN7O3t8e5c+cM1r3zzjuYO3cuTp06BQCora3FqlWrbqvfhx9+GG3btkVERASCg4MNtrl8+TKys7Ph5uYmlwUHByMsLAxubm5y+dChQ3H16lWsWLFCbpebm4tffvnltubSkIMHD+qMT0RERETUFP70H2HbqlUrSJIEOzs7uLq6YtiwYQbb+fj4ICsry2g/sbGxiIiIMFgXFBSExMRE7Nu3D15eXujUqRMqKyt12jg6OqJHjx5IS0sDALi7u8PV1RVqtRqbNm1CYGAgPv3000bnYUy3bt0gSZJ8/Oabb+olJqNGjcLWrVsNnh8aGorXX38dAGBr06JyTyIiIiIysT/9X5OXLl2CSqVCr169MGrUKHz99dd31U94eDhUKpUcNwsODkZiYiKEEEhKSsKkSZPkusGDByM7OxsnT57E7t27UVFRAQAIDAzEpk2bAACJiYlGd0BuV2lpqc78bk4w9u7di/LycowePRobNmwweH58fDz8/Pzg5+eHK7XX72kuRERERNSy/emTjJulp6ejQ4cO6NixI6KiouSHpAGgoKAAffv2veM+fXx80KNHD6SkpECr1SIoKEgnYdi3bx8ee+wxeHt7Y+rUqfD19QVQn5hMmTIFWq0W33//PXr37o3u3bvf9TwaMnToUCiVSmRnZ2PRokUm7ZuIiIiI6FYtKsnw8vKCpaUlqqurERERobMrER0djdjYWLi4uAAArK2tMXXq1Eb7DA4OxsKFC+Hp6QlPT0+4ubnhoYce0vsUp19//RUxMTGYP38+evTogbZt28Ld3V0+Lzo6GsHBwdizZw9sbW0RGhoqn/voo49i0KBB93TtdXV1mD17Nl5++WU4OTndU19ERERERA350ycZN57JkCQJGzduREhICK5f178daOfOnVi2bBl++ukn5OfnQ6PR6HwcrTFBQUFITk7WKUtOTkZQUJBe2y+//BJPPPEEgoOD9c5JSkqSd0Cee+45PPXUUygpKUF+fj6io6Nx+vTpBudx45mMG/Hmm2/qtTl9+jQ2bNiAGTNmNHpdRERERER3S4H6j5kikp05W4tde36Xj1+c2MmMsyEiIiKi5kitVsPPz89g3Z9+J4OIiIiIiO6vP/1H2NKd05bm4MWJhrNSIiIiIqLGcCeDiIiIiIhMikkGERERERGZFJMMIiIiIiIyKT6TQXp6evoiZV2lfPz0ZH66FBERERHdPu5kEBERERGRSTHJICIiIiIik2KSQUREREREJtXikoyamhqjdS+99BLy8vKQm5sLjUaDsLAwAEBCQgICAgIa7GfWrFm4dOkS7O3t5bIhQ4bg999/hyRJKCoqQmxsrM45zs7OqK2txbRp03TKXVxcsGHDBpSUlCAzMxM7duxA7969IUkSJElCdXU1ysrKIEkSUlJSoFQqcfHiRblekiS89NJLAACtVovc3Fzk5OQgNTUVXbt2vfMXjYiIiIjoDomWFDU1NQbLR40aJbKysoSrq6sAIGxsbMRrr70mAIiEhAQREBDQYD/p6ekiLS1NTJkyRS4bMmSI2LZtmwAg7OzsRFFRkfD395frp0+fLtLS0kRqaqpOXwcOHBDTpk2Tj3v37i0GDRokH986H6VSKfLy8gxel1arFc7OzgKAWLhwoVi5cmWjr9H5M7UiZV2lHOZeMwaDwWAwGAxG8wu1Wm20rsXtZBjzzjvvYO7cuTh16hQAoLa2FqtWrbqtcx9++GG0bdsWERERCA4ONtjm8uXLyM7Ohpubm1wWHByMsLAwuLm5yeVDhw7F1atXsWLFCrldbm4ufvnll7u9NNnBgwd1xiciIiIiagpMMv4/Hx8fZGVlGa2PjY3VuR3pZkFBQUhMTMS+ffvg5eWFTp30P/LV0dERPXr0QFpaGgDA3d0drq6uUKvV2LRpEwIDA29rHsZ069ZNZ36DBg3SazNq1Chs3brV4PmhoaFQq9VQq9WwtuWvBRERERHdPf41eZvCw8OhUqnkuFlwcDASExMhhEBSUhImTZok1w0ePBjZ2dk4efIkdu/ejYqKCgBAYGAgNm3aBABITEw0ugNyu0pLS3Xmd/POx969e1FeXo7Ro0djw4YNBs+Pj4+Hn58f/Pz8cPXK9XuaCxERERG1bC02yYiKitLZlSgoKEDfvn3vuB8fHx/06NEDKSkp0Gq1CAoK0kkY9u3bh8ceewze3t6YOnUqfH19AdQnJlOmTIFWq8X333+P3r17o3v37nc9j4YMHToUSqUS2dnZWLRokUn7JiIiIiK6VYtNMiIiInR2JaKjoxEbGwsXFxcAgLW1NaZOndpoP8HBwVi4cCE8PT3h6ekJNzc3PPTQQ3qf4vTrr78iJiYG8+fPR48ePdC2bVu4u7vL50VHRyM4OBh79uyBra0tQkND5XMfffRRg7c/3Ym6ujrMnj0bL7/8MpycnO6pLyIiIiKihrTYJONWO3fuxLJly/DTTz8hPz8fGo1G5+NojQkKCkJycrJOWXJyMoKCgvTafvnll3jiiScQHBysd05SUpK8A/Lcc8/hqaeeQklJCfLz8xEdHY3Tp083OI9bn8l488039dqcPn0aGzZswIwZMxq9LiIiIiKiu6VA/cdMEcnOn6nFod2/y8dPT9Z/kJ2IiIiIWja1Wg0/Pz+DddzJICIiIiIik7Iy9wSo+TmqzcHTkw1npUREREREjeFOBhERERERmRSTDCIiIiIiMikmGUREREREZFJMMkjPI0pfaOIroImvMPdUiIiIiOgBxCSDiIiIiIhMikkGERERERGZFJMMIiIiIiIyKSYZTezatWuQJAnZ2dnIysrCgAEDDLbr2bMn9u7dC0mSUFhYiBUrVsh1AwcOREZGBoqKilBUVITQ0FC5LjIyEhcuXEDHjh3lspqaGvnnTp06Yd26dSgtLUVmZiYOHDiA8ePHN8GVEhERERH9j2A0XdTU1Mg/jxgxQqSmphpst2vXLvHss8/Kxz4+PgKAcHFxEceOHRMqlUoAEM7OziIzM1OMGTNGABCRkZHi2LFjIiYmxuCYBw4cENOmTZOPu3btKmbOnNngnC9U1gpNfIXQxFeY/fVjMBgMBoPBYDTPUKvVRuu4k3Ef2dvb49y5cwbrXF1dUV5eLh/n5+cDAGbMmIG1a9dCkiQAQHV1NebNm4cFCxbIbdesWYPAwEA4OTnp9Dls2DDU1tbq7IocP34cy5YtM9k1ERERERHdiklGE2vVqhUkSUJRURFWrVqF999/32C7uLg47NmzBz/88ANmz54NBwcHAIC3tzeysrJ02mZmZsLb21s+/uOPP7BmzRrMmjVLp523tzc0Gs1tzTM0NBRqtRpqtRqWdvy1ICIiIqK7x78mm9ilS5egUqnQq1cvjBo1Cl9//bXBdmvXrkWvXr2wefNmPPnkk0hPT4eNjc1tj/P5558jJCQEbdu2Ndpm2bJlyM7OxqFDh/Tq4uPj4efnBz8/P9Rdvn7b4xIRERER3YpJxn2Unp6ODh06oGPHjoiKioIkSfJtUABw6tQpJCQkYPz48bh27Rp8fHxQWFiIvn376vTTt29fFBQU6JSdP38e69evx4wZM+SygoIC9OnTRz6eOXMmhg8frvOQOBERERGRqTHJuI+8vLxgaWmJ6upqREREQKVSQaVSAQBGjhwJKysrAICLiwucnZ1x8uRJLF++HFOmTIGvry8AoH379li8eDE++ugjvf4//fRTTJs2Te5nz549sLOzw/Tp0+U2rVu3burLJCIiIqIWzsrcE/izu/FMBgAoFAqEhITg+nX925FGjBiBzz77DJcvXwYAhIeHo6KiAgDw4osvIj4+Hu3atYNCocCSJUuwfft2vT6qq6uRnJyMt99+Wy4bP3484uLiMG/ePFRVVeHChQuYP39+U1wqEREREREAQIH6j5kikl2orMWR7+o/BatPqIuZZ0NEREREzZFarYafn5/BOt4uRUREREREJsXbpUhP4bEc+IUazkqJiIiIiBrDnQwiIiIiIjIpJhlERERERGRSTDKIiIiIiMikmGSQnt7uvjj+8Skc//iUuadCRERERA8gJhlERERERGRSTDKIiIiIiMikmGQQEREREZFJMcm4S9euXYMkScjOzkZWVhYGDBhgsF1kZCTKy8shSRIKCgoQFBQk1yUkJKCsrAySJEGSJOzfvx9KpRInTpyAQqHQ6UeSJPTr1w+RkZEICwszej4AVFVVwdHREQDQuXNnCCEwcOBAua/Kykq0b9/epK8HEREREdENTDLu0qVLl6BSqfDYY4/hnXfeQXR0tNG2cXFxUKlUGDduHFasWAErq/99B2J4eDhUKhVUKhUGDhyIY8eO4fjx4xg8eLDcxsvLC+3atcOhQ4f0+r71fABIT0+Xkx5/f39oNBr4+/sDAHr27Inq6mqcPXvWJK8DEREREdGtmGSYgL29Pc6dO9dou5KSEly8eBFOTk4NttuwYYPOjkdQUBASExNvez4HDhyQkwp/f3/ExcXpJB03djyIiIiIiJoCk4y71KpVK0iShKKiIqxatQrvv/9+o+eoVCoUFxejqqpKLouNjZVvd/r2228BAJs2bcL48eNhaWkJAAgMDMSGDRsM9mno/P3798tJRr9+/ZCcnIwuXboAqE8yDhw4oNdPaGgo1Go11Go1LFop9OqJiIiIiG6XVeNNyJAbt0sBQP/+/fH111/Dx8fHYNs5c+bglVdeQc+ePfHMM8/o1IWHhyMpKUmnrLKyEvn5+Rg+fDgqKipw7do1FBQUGOzb0PlqtRoqlQqtW7eGtbU1Lly4gLKyMnTr1g3+/v745JNP9PqJj49HfHw8AODKqdrbexGIiIiIiAzgToYJpKeno0OHDujYsSOioqLknYUb4uLi4OPjg4CAAKxevRq2traN9nnjlqmgoCCjuxjGXLp0CcXFxXj11Veh0WjkOY4ZMwadOnXCkSNH7uwCiYiIiIjuAJMME/Dy8oKlpSWqq6sREREhP4h9q23btiEzMxMhISGN9rllyxaMGTMGgYGBd/Q8xg0HDhzA7NmzcfDgQQDAwYMHMWvWLKSnp99xX0REREREd4JJxl268UyGJEnYuHEjQkJCcP369UbPe++99/D222/LH1F78zMVkiTB2toaAHD+/HkcPHgQFRUV0Gq1Rvszdv7+/fvRrVs3OcnQaDRwd3c3+DwGEREREZEpKQAIc0+Cmpcrp2pRsa4aANB1rquZZ0NEREREzZFarYafn5/BOu5kEBERERGRSfHTpUhPbnkO/OYazkqJiIiIiBrDnQwiIiIiIjIpJhlERERERGRSTDKIiIiIiMikmGSQHl93b5z6xPA3jBMRERERNYZJBhERERERmRSTDCIiIiIiMikmGUREREREZFJMMkykpqbGYHlkZCTKy8shSRIKCgoQFBQk1yUkJKCsrAySJEGSJOzfvx9KpRInTpyAQqHQ6UeSJPTr1w+RkZEICwszej4AVFVVwdHREQDQuXNnCCEwcOBAua/Kykq0b9/epNdPRERERHQDk4z7IC4uDiqVCuPGjcOKFStgZfW/70AMDw+HSqWCSqXCwIEDcezYMRw/fhyDBw+W23h5eaFdu3Y4dOiQXt+3ng8A6enpGDBgAADA398fGo0G/v7+AICePXuiuroaZ8+ebcpLJiIiIqIWjEnGfVRSUoKLFy/CycmpwXYbNmzQ2fEICgpCYmLibY9z4MABOanw9/dHXFycTtJxY8eDiIiIiKgpMMm4j1QqFYqLi1FVVSWXxcbGyrc7ffvttwCATZs2Yfz48bC0tAQABAYGYsOGDQb7NHT+/v375SSjX79+SE5ORpcuXQDUJxkHDhzQ6yc0NBRqtRpqtRqK1pamu2giIiIianGsGm9C92rOnDl45ZVX0LNnTzzzzDM6deHh4UhKStIpq6ysRH5+PoYPH46Kigpcu3YNBQWGv7fC0PlqtRoqlQqtW7eGtbU1Lly4gLKyMnTr1g3+/v745JNP9PqJj49HfHw8AKD21MV7uVwiIiIiauG4k2FiUVFR8s7CDXFxcfDx8UFAQABWr14NW1vbRvu5cctUUFCQ0V0MYy5duoTi4mK8+uqr0Gg0AOqf0xgzZgw6deqEI0eO3NlFERERERHdASYZJhYRESE/iH2rbdu2ITMzEyEhIY32s2XLFowZMwaBgYF39DzGDQcOHMDs2bNx8OBBAMDBgwcxa9YspKen33FfRERERER3gknGffbee+/h7bfflj+i9uZnKiRJgrW1NQDg/PnzOHjwICoqKqDVao32Z+z8/fv3o1u3bnKSodFo4O7ubvB5DCIiIiIiU1IAEOaeBDUvtacuonq9Fq5h3uaeChERERE1U2q1Gn5+fgbruJNBREREREQmxSSD9OSUF3AXg4iIiIjuGpMMIiIiIiIyKSYZRERERERkUkwyiIiIiIjIpJhkkB7fLn8x9xSIiIiI6AHGJIOIiIiIiEyKSQYREREREZkUkwwiIiIiIjIpJhl34dq1a5AkCdnZ2cjKysKAAQN06qdMmQJJkiBJEq5cuYLc3FxIkoTo6GiEhISgsrISkiShqKgIs2fP1jnX19cXQgiMHDlSp7ympgYAoFQqkZeXpzenhIQElJWVyePu378fABodj4iIiIioKQjGnUVNTY3884gRI0RqaqrRtlqtVjg7O8vHISEhYunSpQKAaN++vaiqqhLu7u5yfUxMjEhLSxNr1641OKZSqRR5eXl64yQkJIiAgAC98sbGMxS1p2sarGcwGAwGg8FgMNRqtdE67mTcI3t7e5w7d+6uzj179ixKSkrg6uoql02aNAlTpkzB008/DVtbW1NN0+h4RERERESmZmXuCTyIWrVqBUmSYGdnB1dXVwwbNuyu+unSpQvs7OyQm5sLAPD394dWq0VZWRlSU1MxduxYbNmy5bb7i42NRUREBACgoKAAL774YoPj3Sw0NBSvv/46AMCiFX8tiIiIiOjemH2r5UGLm2+X6t+/v8jPzzfa1tDtUpWVlSInJ0dcuXJFhIaGynVLly4Vr732mgAgnnnmGbF582a9Me/mdilj4xkL3i7FYDAYDAaDwWgseLtUE0pPT0eHDh3QsWNHREVFyQ9eN2Tjxo3w9fWFv78/YmJi4OLiAgsLCwQEBODdd9+FVqvF0qVLMWrUKLRt2/ae52hoPCIiIiKipsIk4x55eXnB0tIS1dXViIiIgEqlgkqluq1zs7Ky8M0332DWrFkYPnw4cnNz0bVrV3h6esLDwwNJSUl47rnnTDbXm8cjIiIiImoqvPn+Ltx4JgMAFAoFQkJCcP369bvqa/HixdBoNOjcuTOSk5N16pKSkvDGG2/gm2++0Sn38vLCiRMn5OM5c+YA0H0mAwD69etndLwPP/wQf/zxx13NmYiIiIioIQrU3zdFJKs9XQObzu3MPQ0iIiIiasbUajX8/PwM1vF2KSIiIiIiMikmGaQn58Rhc0+BiIiIiB5gTDKIiIiIiMikmGQQEREREZFJMckgIiIiIiKTYpJBREREREQmxSSDiIiIiIhMikkGERERERGZFJMMIiIiIiIyKSYZJlBTU6NzPGXKFEiSBEmScOXKFeTm5kKSJERHRyMkJASVlZWQJAlFRUWYPXu2zrm+vr4QQmDkyJEGx1AqlcjLy9ObQ0JCAsrKyuRx9+/fDwCNjkdERERE1BQE496ipqbGaJ1WqxXOzs7ycUhIiFg7c7PGAAAT4UlEQVS6dKkAINq3by+qqqqEu7u7XB8TEyPS0tLE2rVrDY6hVCpFXl6e3jgJCQkiICBAr7yx8QyFWq02+2vKYDAYDAaDwWje0dDfjNzJMKOzZ8+ipKQErq6uctmkSZMwZcoUPP3007C1tW3y8YiIiIiITI1Jhhl16dIFdnZ2yM3NBQD4+/tDq9WirKwMqampGDt27B31FxsbK98u9e233zY63s1CQ0OhVquhVqvRoUOHu7sgIiIiIiIwyTCLwMBA5OTkoKSkBP/+979x5coVAEBwcDASExMBAImJiQgODr6jfsPDw6FSqaBSqfDiiy82Ot7N4uPj4efnBz8/P5w5c+Yero6IiIiIWjomGSYUFRUl7yQ0ZOPGjfD19YW/vz9iYmLg4uICCwsLBAQE4N1334VWq8XSpUsxatQotG3b9p7nZWg8IiIiIqKmwiTDhCIiIuSdhNuRlZWFb775BrNmzcLw4cORm5uLrl27wtPTEx4eHkhKSsJzzz1nsvndPB4RERERUVNhkmFmixcvxiuvvILg4GAkJyfr1CUlJRm8ZcrLywsnTpyQY+LEiQB0n8mQJAnW1tZGxzPFDgkRERERkSEK1H/MFJFMrVbDz8/P3NMgIiIiomasob8ZuZNBREREREQmxSSDiIiIiIhMikkGERERERGZFJ/JID3//e9/ceTIEXNPg27RoUMHfodJM8R1aZ64Ls0T16V54ro0Tw/CuiiVSnTq1MlovWAwbg61Wm32OTC4Lg9KcF2aZ3BdmmdwXZpncF2aZzzo68LbpYiIiIiIyKSYZBARERERkUlZAlho7klQ86PRaMw9BTKA69I8cV2aJ65L88R1aZ64Ls3Tg7wufPCbiIiIiIhMirdLERERERGRSTHJICIiIiIik2KSQTpGjhyJw4cPo7i4GPPnzzf3dP6UVq9ejYqKCuTl5cllTk5O+PHHH3H06FH8+OOPcHR0lOs+++wzFBcXIycnByqVSi5/+eWXcfToURw9ehQvv/yyXN6nTx/k5uaiuLgYn3322f25qAecu7s79uzZg4KCAuTn5+Ott94CwHUxN1tbW2RkZCA7Oxv5+flYuHAhAMDDwwPp6ekoLi5GYmIirK2tAQA2NjZITExEcXEx0tPToVQq5b4WLFiA4uJiHD58GCNGjJDL+Z539ywsLKDRaLBt2zYAXJfmQKvVIjc3F5IkQa1WA+D7WHPg4OCAzZs3o6ioCIWFhejfv3+LWRezf44uo3mEhYWFKCkpEZ6ensLa2lpkZ2eLXr16mX1ef7YYPHiwUKlUIi8vTy5bvHixmD9/vgAg5s+fL2JiYgQAMXr0aPHDDz8IAOLxxx8X6enpAoBwcnISpaWlwsnJSTg6OorS0lLh6OgoAIiMjAzx+OOPCwDihx9+EKNGjTL7NTf36Ny5s1CpVAKAaNu2rThy5Ijo1asX16UZRJs2bQQAYWVlJdLT08Xjjz8uNm7cKAIDAwUA8cUXX4jp06cLAOKNN94QX3zxhQAgAgMDRWJiogAgevXqJbKzs4WNjY3w8PAQJSUlwsLCgu959xhz5swR69atE9u2bRMAuC7NILRarXB2dtYp4/uY+WPt2rVi6tSpAoCwtrYWDg4OLWVdzD4BRjOJ/v37i127dsnHCxYsEAsWLDD7vP6MoVQqdZKMw4cPi86dOwug/g/ew4cPCwDiyy+/FEFBQXrtgoKCxJdffimX32jXuXNnUVRUJJff2o5xe7F161bx1FNPcV2aUbRq1UpkZWWJfv36iaqqKmFpaSkA3fetXbt2if79+wsAwtLSUlRVVQlA/73sRju+5919uLm5iZ9++kkMHTpUTjK4LuYPQ0kG38fMG/b29qKsrEyvvCWsC2+XIpmbmxtOnDghH5eXl8PNzc2MM2o5XFxccPr0aQDA6dOn4eLiAsD4mjRUXl5erldOt0+pVEKlUiEjI4Pr0gxYWFhAkiRUVlYiJSUFpaWl+P3331FXVwdA97W8+fWvq6vD+fPn4ezsfMfrRY1bsmQJ5s2bh+vXrwMAnJ2duS7NgBACP/74IzIzMxEaGgqA/38xN09PT1RVVSEhIQEajQbx8fFo3bp1i1gXJhlEzZAQwtxTaJHatGmDpKQkzJ49GzU1NXr1XJf77/r161CpVHB3d0e/fv3wl7/8xdxTavHGjh2LysrKB/rz+/+sBg0ahL59+2L06NGYMWMGBg8erNeG72P3l5WVFfr06YMvvvgCffr0wYULF7BgwQK9dn/GdWGSQbKTJ0+iS5cu8rG7uztOnjxpxhm1HBUVFejcuTMAoHPnzqisrARgfE0aKnd3d9crp8ZZWVkhKSkJ69atQ3JyMgCuS3Ny/vx57N27FwMGDICjoyMsLS0B6L6WN7/+lpaWcHBwQHV19R2vFzVs4MCBePbZZ6HVapGYmIhhw4bhs88+47o0A7/99hsAoKqqCsnJyejXrx/fx8ysvLwc5eXlOHToEADgP//5D/r06dNi1sXs92wxmkdYWlqK0tJS4eHhIT9s98gjj5h9Xn/GuPWZjI8++kjnAbDFixcLAGLMmDE6D4BlZGQIoP4BsLKyMuHo6CgcHR1FWVmZcHJyEoD+A2CjR482+/U+CPHVV1+JuLg4nTKui3mjQ4cOwsHBQQAQdnZ2Ii0tTYwdO1Zs2rRJ5wHjN954QwAQf//733UeMN64caMAIB555BGdB4xLS0uFhYUF3/NMEEOGDJGfyeC6mDdat24t2rZtK/+8f/9+MXLkSL6PNYNIS0sTPXv2FABEZGSk+Oijj1rKuph9AoxmFKNHjxZHjhwRJSUl4h//+IfZ5/NnjPXr14vffvtN1NbWihMnTohXX31VtG/fXvz000/i6NGjIiUlRX7jACCWLVsmSkpKRG5urujbt69c/sorr4ji4mJRXFwspkyZIpf37dtX5OXliZKSErF06VKzX++DEAMHDhRCCJGTkyMkSRKSJInRo0dzXcwcjz76qNBoNCInJ0fk5eWJf/3rXwKA8PT0FBkZGaK4uFhs2rRJ2NjYCADC1tZWbNq0SRQXF4uMjAzh6ekp9/WPf/xDlJSUiMOHD+t88grf8+4tbk4yuC7mDU9PT5GdnS2ys7NFfn6+/Lrxfcz84evrK9RqtcjJyRHJycnC0dGxRayL4v//QEREREREZBJ8JoOIiIiIiEyKSQYREREREZkUkwwiIiIiIjIpJhlERERERGRSTDKIiIiIiMikmGQQETUDQgh8/PHH8nFYWBgiIyNN0ndCQgICAgJM0ldDJk6ciMLCQuzZs0enXKlUIjg4uMnHb8kWLVqE4cOHm3saBvn6+mL06NHmngYR3WdMMoiImoHLly9jwoQJcHZ2NvdUdNz4BufbMXXqVISGhmLYsGE65R4eHnjhhRdMPTWzUSgUDR6bQ2RkJH7++WdzT8Ogxx57DGPGjDH3NIjoPmOSQUTUDFy7dg0rV67EnDlz9Opu3YmoqakBAAwZMgSpqanYunUrSktLER0djRdeeAEZGRnIzc3Fww8/LJ/z1FNPQa1W48iRIxg7diwAwMLCAh999BEOHTqEnJwcvP7663K/aWlp+O6771BYWKg3n6CgIOTm5iIvLw8xMTEAgH/9618YNGgQVq9ejY8++kinfUxMDAYPHgxJkjB79mwolUqkpaUhKysLWVlZGDBgAID6P9aXL1+OoqIi/Pjjj9ixY4d83dHR0SgoKEBOTg5iY2P15uTk5ITk5GTk5OTg4MGDePTRRwEAbdq0wZo1a5Cbm4ucnBxMmDABADBy5EhkZWUhOzsbP/30E4D6P9TDwsLkPvPy8qBUKqFUKnH48GF89dVXyM/Px+DBg3WOu3Tpgrlz58qv48KFCwHU7+AUFhZi5cqVyM/Px+7du2FnZwcA6NatG1JSUpCdnY2srCx5rQz107p1a2zfvh3Z2dnIy8vD888/3+DviFarxcKFC5GVlYXc3Fx4eXnptTe2BjczNm6fPn2QmpqKzMxM7Nq1C507dwYA7N27FzExMcjIyMCRI0cwaNAgWFtb47333kNgYCAkScLzzz+P1q1bY/Xq1cjIyIBGo8Gzzz4LAAgJCUFSUhJ27tyJo0ePYvHixfJcDK2XsX6IqPkw+zcCMhgMRkuPmpoa0a5dO6HVaoW9vb0ICwsTkZGRAoBISEgQAQEBOm2B+m9bPnfunOjcubOwsbER5eXlYuHChQKAeOutt0RcXJx8/s6dO4VCoRDdu3cXJ06cELa2tiI0NFT885//FACEjY2NUKvVwsPDQwwZMkT88ccfwsPDQ2+erq6u4tixY6JDhw7C0tJS/Pzzz2LcuHECgNi7d6/Ot9PeiJu/FRqAaNWqlbC1tRUARPfu3YVarRYAREBAgNixY4dQKBTCxcVFnD17VgQEBIj27duLw4cPy+c7ODjojfH555+Ld999VwAQQ4cOFZIkCQAiJiZGfh0ACEdHR9GhQwdx/Phx+fpufNNuZGSkCAsLk9vm5eUJpVIplEqlqKurE48//rgAoHf89NNPixUrVggAQqFQiG3btonBgwcLpVIprl69Knx9fQUAsXHjRjF58mQBQKSnp4vx48cLoP4bsVu1amW0nwkTJoiVK1fK87K3t9e7/pt/R7RarZg5c6YAIN544w0RHx+v197YGtwchsa1srIS+/fvFx06dBAAxPPPPy9Wr14tr//HH38sgPpv7E5JSREAREhIiM63EH/wwQfy6+Dg4CCOHDkiWrduLUJCQkRpaamwt7cXtra24tdffxXu7u5G18tYP+b+b5nBYNSHFYiIqFmoqanB119/jbfeeguXLl26rXPUajVOnz4NACgtLcWPP/4IoP5f4YcOHSq327RpE4QQKCkpQVlZGf7yl79gxIgR6N27NyZOnAgAcHBwQI8ePVBbW4tDhw7h119/1RvPz88PqampOHPmDABg3bp1eOKJJ/Ddd9/d9nVaW1tj2bJleOyxx1BXV4eePXsCAAYNGoTNmzdDCIGKigrs3bsXAHD+/HlcvnwZq1evxvbt27F9+3a9PgcNGiT/S/7evXvh7OyMdu3a4amnnkJQUJDc7vfff8ff/vY3pKWlydd37ty5Rud87NgxZGRkGDweMWIERowYAUmSAABt27ZFjx49cPz4cWi1WuTk5AAAsrKy4OHhgbZt28LNzQ1bt24FAFy5cqXBfvbt24dPPvkEMTEx2L59O3755ZdG57tlyxZ5zBu7NzcztgY3y8vL0xvX29sbPj4+SElJAVB/O92pU6cMjuvh4WFwbiNGjMCzzz6LuXPnAgDs7OzQtWtXAMDPP/+M//73vwCAwsJCKJVKODk5GVwvY/0cPny40deHiJoekwwiomZkyZIl0Gg0SEhIkMuuXbsGC4v6u1sVCgVsbGzkuht/oALA9evX5ePr16/Dyup/b/FCCJ1xhBBQKBR488035cTkhiFDhuDChQumu6hbzJkzBxUVFfD19YWFhQUuX77cYPu6ujr069cPw4cPx8SJEzFz5swmecj55tcZgHxrEwC91+PmY4VCgejoaKxcuVKnjVKp1Fmfuro6tGrVyuj4xvoB6m9RGjNmDKKiovDzzz/j/fffb/BaboxbV1en83tww+2sQXFxsd64ycnJKCgogL+//12Ne+M6AwICcPToUZ3yxx9/XO/1MtZHQ/0QUfPAZzKIiJqRc+fOYdOmTZg6dapc9uuvv6Jv374AgGeffVYnybhdkyZNgkKhwMMPP4yHH34YR44cwe7du/HGG2/If8j16NEDrVu3brCfQ4cOYciQIXB2doaFhQWCg4Pxf//3fw2eU1NTg3bt2snHDg4OOHXqFIQQeOmll+Tx9+/fj4CAACgUCnTq1AlPPvkkgPrnKhwcHLBz507MmTMHvr6+emPs27cPkydPBlCfJJ05cwY1NTVISUnBjBkz5HaOjo5IT0/HE088If9Lu5OTE4D617lPnz4AAJVKBU9Pzwav64bdu3fj1VdfRZs2bQAADz30EDp27Gi0/R9//IHy8nKMGzcOAGBjY4NWrVoZ7cfV1RUXL17EunXrEBsbK8/xXhhbg5sZGvfIkSPo2LEj+vfvDwCwsrLCI4880uBYt67/7t278eabb8rHjz32WIPnG1uvO+2HiO4v7mQQETUzn3zyCWbOnCkfx8fH47vvvkN2djZ27dqFP/744477PH78OA4dOgR7e3tMnz4dV65cwapVq+Dh4QGNRgOFQoGqqiqMHz++wX5Onz6NBQsWYO/evVAoFNixYwe+//77Bs/Jzc1FXV0dsrOzsXbtWvz73/9GUlISXn75ZZ3rSUpKwvDhw1FYWIgTJ05Ao9Hg/PnzaNeuHb777jvY2dlBoVDg7bff1htj4cKFWLNmDXJycnDx4kWEhIQAAKKiorB8+XLk5eWhrq4OixYtQnJyMl5//XVs2bIFFhYWqKysxIgRI+Q55efnIyMj47b/hTwlJQW9evXCwYMHAdQnES+++CLq6uqMnvPSSy9hxYoVeO+993D16lVMmjTJaD/du3dHbGwsrl+/jqtXr+KNN964rXk1xNga3OzRRx/VG/fq1auYOHEiPv/8czg4OMDKygpLliwx+AEBN+zduxcLFiyAJEmIjo7G+++/jyVLliA3NxcWFhbQarV45plnjJ5/5swZg+t1p/0Q0f2lQP3DGURERGbXpk0bXLhwAe3bt8ehQ4cwcOBAVFRUmHtaRER0h7iTQUREzcb27dvh6OgIGxsbvP/++0wwiIgeUNzJICIiIiIik+KD30REREREZFJMMoiIiIiIyKSYZBARERERkUkxySAiIiIiIpNikkFERERERCb1/wCaBpYG4v6GCQAAAABJRU5ErkJggg==\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "YPCRkkm768sx"
+ },
+ "source": [
+ "\n",
+ "## Conclusion after analysis"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "BhDBB8847Dev"
+ },
+ "source": [
+ "### Vocab\n",
+ "1. The vocab size is 6710. This is not a large dataset.\n",
+ "2. Our dataset it comprised of short sentences, with average length of 10 and median length of 9, and the length ranges from 1 to 47. Only a small amount of sentences have the length greater than 30 => set the max length equal to 30 => need a lot of padding tokens.\n",
+ "3. 50% of the vocabulary only occur once but they could be person's names so let's keep them.\n",
+ "4. Year: can be replaced by a common\n",
+ "5. Number in text: can be replaced by a common\n",
+ "6. Lemmatization: *films* to *film*\n",
+ "7. All words are in lowercase.\n",
+ "8. No punctuations.\n",
+ "9. No informal text.\n",
+ "10. No abbreviation (like *'ll, can't, 2morrow*)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "fRaEnZIe8UWy"
+ },
+ "source": [
+ "### Tags\n",
+ "1. Most of tags are short, with median length and average length of 2 words. The longest tag has 16 words. This could be a movie name.\n",
+ "2. Most sentences are about movies and ratings.\n",
+ "3. There are 25 classes of entities, divided to 3 categories: B Tags (Beginning of an entity), I Tag (Intermediate Entity), Or None Tag (O). Proportions of B,I,O are about 21.40%, 17.28%, 61.32% respectively.\n",
+ "4. Most of the tags are in the minority and O is the most common entity => need to over-sample the tags from the minority groups.\n",
+ "5. As under the section 2.7 Check the dataset imbalance, the percentage of sentences that only contain O tags -> 0.59% => It's small amount so we don't need to delete those sentences.\n",
+ "6. ALso under the section 2.7 Check the dataset imbalance, the percentage of OOV tokens in test set -> 3.53%\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Dtr2P3ZE3sE6"
+ },
+ "source": [
+ "\n",
+ "# Part 2: Pre-process the data\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "MRofcZfAUd9g"
+ },
+ "source": [
+ "\n",
+ "## 2.1 Stemming"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "1J6BsedjUeOf"
+ },
+ "source": [
+ "def stem_sentence(sentence):\n",
+ " sentence = sentence.split(' ')\n",
+ " stemmer = PorterStemmer()\n",
+ " result = [stemmer.stem(word) for word in sentence]\n",
+ " stemmed_sentence = ' '.join(result)\n",
+ " return stemmed_sentence"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4p6nLSAePE9U"
+ },
+ "source": [
+ "\n",
+ "## 2.2 Lemmatization"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "reh2ym9rO2PM",
+ "outputId": "e58500ab-a58e-4abe-ab2c-48e905ed18d5"
+ },
+ "source": [
+ "nltk.download('punkt')\n",
+ "nltk.download('wordnet')\n",
+ "def lemmatize_sentence(sentence):\n",
+ " tokenization = nltk.word_tokenize(sentence)\n",
+ " lemmatizer = WordNetLemmatizer()\n",
+ " result = [lemmatizer.lemmatize(word) for word in tokenization]\n",
+ " lemmatized_sentence = ' '.join(result)\n",
+ " return lemmatized_sentence"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+ "[nltk_data] Unzipping tokenizers/punkt.zip.\n",
+ "[nltk_data] Downloading package wordnet to /root/nltk_data...\n",
+ "[nltk_data] Unzipping corpora/wordnet.zip.\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Hm9UnKBqUeZA"
+ },
+ "source": [
+ "\n",
+ "## 2.3 Replacement"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "EZzhc1zBUeiv"
+ },
+ "source": [
+ "import re\n",
+ "\n",
+ "def replace(sentence, to_replace, replace_by):\n",
+ " replaced = sentence.replace(to_replace, replace_by)\n",
+ " return replaced\n",
+ "\n",
+ "def replace_num(sentence):\n",
+ " replaced = re.sub(r'^\\d{1,2}$', \"NUM\", sentence) # replace 1, 2 digits\n",
+ " replaced = re.sub(r'^\\d{4}$', \"YEAR\", replaced) # replace year\n",
+ " replaced = re.sub(r'^\\d{4}s$', \"YEAR\", replaced) # replace year\n",
+ " return replaced\n"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "vfswr-bAXwl5"
+ },
+ "source": [
+ "\n",
+ "## 2.4 Pre-processing pipeline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "uvGcbsdOXw3N"
+ },
+ "source": [
+ "def apply_preproc(data_generator):\n",
+ " data_generator = list(map(lambda x: replace(x, \"ca n t\",\"cannot\"), data_generator))\n",
+ " data_generator = list(map(lambda x: replace(x, \"ll\",\"will\"), data_generator))\n",
+ " data_generator = list(map(lambda x: replace_num(x), data_generator))\n",
+ " data_generator = list(map(lambda x: lemmatize_sentence(x), data_generator))\n",
+ " return data_generator\n",
+ "\n",
+ "processed_sentences = apply_preproc(sentences)\n",
+ "processed_test_sentences = apply_preproc(test_sentences)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "WMK0gaxoOi97"
+ },
+ "source": [
+ "\n",
+ "## 2.5 Split to train/val datasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "yjPJLq1I0_CX"
+ },
+ "source": [
+ "split_ratio = 0.8\n",
+ "\n",
+ "def train_val_split(data, label, ratio, shuffle=True, random_seed=33):\n",
+ " length = len(data)\n",
+ " lines_index = [*range(length)] \n",
+ " # shuffle the indexes if shuffle is set to True\n",
+ " rnd.seed(random_seed)\n",
+ " if shuffle:\n",
+ " rnd.shuffle(lines_index)\n",
+ " split_point = int(length * ratio)\n",
+ "\n",
+ " train_data = []\n",
+ " train_label = []\n",
+ " val_data = []\n",
+ " val_label = []\n",
+ " for i in range(length):\n",
+ " if i <= split_point:\n",
+ " train_data.append(data[lines_index[i]])\n",
+ " train_label.append(label[lines_index[i]])\n",
+ " else:\n",
+ " val_data.append(data[lines_index[i]])\n",
+ " val_label.append(label[lines_index[i]])\n",
+ " return train_data, train_label, val_data, val_label\n",
+ "\n",
+ "\n",
+ "train_sentences, train_tags, val_sentences, val_tags = \\\n",
+ " train_val_split(processed_sentences, tags, split_ratio)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3Sot8kCJltNu"
+ },
+ "source": [
+ "\n",
+ "## 2.6 Tokenization and Padding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "GDfsVDq9KxgV"
+ },
+ "source": [
+ "oov_tok = \"\"\n",
+ "trunc_type='post'\n",
+ "pad_type='post'\n",
+ "max_length = 50\n",
+ "\n",
+ "vocab_tokenizer = Tokenizer(oov_token=oov_tok)\n",
+ "vocab_tokenizer.fit_on_texts(train_sentences)\n",
+ "\n",
+ "vocab = vocab_tokenizer.word_index\n",
+ "reverse_vocab = dict([(value, key) for (key, value) in vocab.items()])\n",
+ "vocab_size = len(vocab)\n",
+ "\n",
+ "\n",
+ "train_sequences = vocab_tokenizer.texts_to_sequences(train_sentences)\n",
+ "val_sequences = vocab_tokenizer.texts_to_sequences(val_sentences)\n",
+ "test_sequences = vocab_tokenizer.texts_to_sequences(processed_test_sentences)\n",
+ "\n",
+ "train_padded_sequences = pad_sequences(train_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "val_padded_sequences = pad_sequences(val_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "test_padded_sequences = pad_sequences(test_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "CP_DmX44K24X",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "c581b751-a8a5-438a-cdde-53305a7d09a9"
+ },
+ "source": [
+ "tag_tokenizer = Tokenizer(filters=\".\", lower=False, oov_token=oov_tok)\n",
+ "tag_tokenizer.fit_on_texts(train_tags)\n",
+ "\n",
+ "tag_map = tag_tokenizer.word_index\n",
+ "reverse_tag_map = dict([(value, key) for (key, value) in tag_map.items()])\n",
+ "tag_size = len(tag_map)\n",
+ "\n",
+ "train_tag_sequences = tag_tokenizer.texts_to_sequences(train_tags)\n",
+ "val_tag_sequences = tag_tokenizer.texts_to_sequences(val_tags)\n",
+ "test_tag_sequences = tag_tokenizer.texts_to_sequences(test_tags)\n",
+ "\n",
+ "\n",
+ "train_padded_tags = pad_sequences(train_tag_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "val_padded_tags = pad_sequences(val_tag_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "test_padded_tags = pad_sequences(test_tag_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "print(\"\\nExample of a a sentence and its tokenized, padded version\")\n",
+ "print(train_sentences[0])\n",
+ "print(train_padded_sequences[0])\n",
+ "print(\"\\nExample of a list of tags in a sentence and its tokenized, padded version\")\n",
+ "print(train_tags[0])\n",
+ "print(train_padded_tags[0])\n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Example of a a sentence and its tokenized, padded version\n",
+ "what is the movie triangle\n",
+ "[ 5 7 2 3 1866 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0]\n",
+ "\n",
+ "Example of a list of tags in a sentence and its tokenized, padded version\n",
+ "O O O O B-TITLE\n",
+ "[2 2 2 2 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "VmNBER6vh8fZ"
+ },
+ "source": [
+ "\n",
+ "## 2.7 Check the Imbalance in train/test dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "1JkHts8fDElj",
+ "outputId": "594fd0fd-2ea0-4688-b005-be211a32076f"
+ },
+ "source": [
+ "#Percentage of B, I and O Tags in train dataset\n",
+ "def get_tag_proportion(series_tags):\n",
+ " '''\n",
+ " Input:\n",
+ " series_tags - pd.Series of tags\n",
+ " Output:\n",
+ " [print] - B, I and O tags' proportion\n",
+ " '''\n",
+ " tags_list=[tag for tag in series_tags]\n",
+ " counter=dict(Counter(tags_list))\n",
+ " beg = 0\n",
+ " inter = 0\n",
+ " out = 0\n",
+ " for key, value in counter.items():\n",
+ " if key.startswith(\"B\"):\n",
+ " beg += value\n",
+ " elif key.startswith(\"I\"):\n",
+ " inter += value\n",
+ " else:\n",
+ " out += value\n",
+ " total = len(tags_list)\n",
+ " print(\"B tags proportion = {0:.2%}\".format(round(beg/total,4)))\n",
+ " print(\"I tags proportion = {0:.2%}\".format(round(inter/total,4)))\n",
+ " print(\"O tags proportion = {0:.2%}\".format(round(out/total,4)))\n",
+ "\n",
+ "get_tag_proportion(df[\"Tag\"])\n",
+ "\n",
+ "# Percentage of sentences that only contain O tags\n",
+ "# If this percentage > 50% => the dataset is imbalanced => drop empty sentences\n",
+ "def get_empty_tag_sentence_proportion(list_tag_sequence):\n",
+ " '''\n",
+ " Input:\n",
+ " list_tag_sequence - list of tag sequences in train/test set\n",
+ " Output:\n",
+ " [print] - Percentage of sentences that only contain O tags\n",
+ " '''\n",
+ " count = 0\n",
+ " for seq in list_tag_sequence:\n",
+ " if sum(seq) == 2 * len(seq): # if seq contains only 2 (token for O tag)\n",
+ " count += 1\n",
+ "\n",
+ " \n",
+ " print(\"\\nPercentage of sentences that only contain O tags -> {0:.2%}\".\\\n",
+ " format(round(count/len(list_tag_sequence),4)))\n",
+ " \n",
+ "get_empty_tag_sentence_proportion(train_tag_sequences)\n",
+ "\n",
+ "def get_OOV_density(list_token_sequence):\n",
+ " '''\n",
+ " Input:\n",
+ " list_token_sequence - list of token sequences in test set\n",
+ " Output:\n",
+ " [print] - Percentage of OOV token in the test set\n",
+ " '''\n",
+ " list_token_sequence = [token for seq in list_token_sequence for token in seq]\n",
+ " counter=dict(Counter(list_token_sequence))\n",
+ " print(\"\\nPercentage of OOV tokens in test set -> {0:.2%}\".\\\n",
+ " format(round(counter[1]/len(list_token_sequence),4)))\n",
+ "\n",
+ "get_OOV_density(test_sequences)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "B tags proportion = 21.40%\n",
+ "I tags proportion = 17.28%\n",
+ "O tags proportion = 61.32%\n",
+ "\n",
+ "Percentage of sentences that only contain O tags -> 0.59%\n",
+ "\n",
+ "Percentage of OOV tokens in test set -> 3.54%\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0OZsbuw2iDAp"
+ },
+ "source": [
+ "\n",
+ "## 2.8 One-hot encoding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "qdsnDiIZwGaC"
+ },
+ "source": [
+ "# Ont hot encoding\n",
+ "train_padded_tags = np.array([to_categorical(tags, num_classes = tag_size+1) \\\n",
+ " for tags in train_padded_tags])\n",
+ "val_padded_tags = np.array([to_categorical(tags, num_classes = tag_size+1) \\\n",
+ " for tags in val_padded_tags])\n",
+ "test_padded_tags = np.array([to_categorical(tags, num_classes = tag_size+1) \\\n",
+ " for tags in test_padded_tags])\n",
+ "\n"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "84RwGepu37jO"
+ },
+ "source": [
+ "\n",
+ "# Part 3: Building the model\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yX9PMBUtuJmJ"
+ },
+ "source": [
+ "\n",
+ "## 3.1 Glove Embedding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "3QAO1_GehxQ1",
+ "outputId": "58d403c6-9566-4f90-d5e2-6e882c568a51"
+ },
+ "source": [
+ "!mkdir -p /glove_embedding\n",
+ "# Download data\n",
+ "!wget --no-check-certificate \\\n",
+ "http://nlp.stanford.edu/data/glove.6B.zip -O /glove_embedding/glove.6B.zip\n",
+ "!unzip /glove_embedding/glove.6B.zip -d /glove_embedding\n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "--2021-06-03 08:19:24-- http://nlp.stanford.edu/data/glove.6B.zip\n",
+ "Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140\n",
+ "Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.\n",
+ "HTTP request sent, awaiting response... 302 Found\n",
+ "Location: https://nlp.stanford.edu/data/glove.6B.zip [following]\n",
+ "--2021-06-03 08:19:24-- https://nlp.stanford.edu/data/glove.6B.zip\n",
+ "Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.\n",
+ "HTTP request sent, awaiting response... 301 Moved Permanently\n",
+ "Location: http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]\n",
+ "--2021-06-03 08:19:25-- http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip\n",
+ "Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22\n",
+ "Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:80... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 862182613 (822M) [application/zip]\n",
+ "Saving to: ‘/glove_embedding/glove.6B.zip’\n",
+ "\n",
+ "/glove_embedding/gl 100%[===================>] 822.24M 5.16MB/s in 2m 42s \n",
+ "\n",
+ "2021-06-03 08:22:07 (5.09 MB/s) - ‘/glove_embedding/glove.6B.zip’ saved [862182613/862182613]\n",
+ "\n",
+ "Archive: /glove_embedding/glove.6B.zip\n",
+ " inflating: /glove_embedding/glove.6B.50d.txt \n",
+ " inflating: /glove_embedding/glove.6B.100d.txt \n",
+ " inflating: /glove_embedding/glove.6B.200d.txt \n",
+ " inflating: /glove_embedding/glove.6B.300d.txt \n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "u2SIlF-NeR_o",
+ "outputId": "b3d92d12-4e8c-470c-e694-33e6fc5c6fbc"
+ },
+ "source": [
+ "GLOVE_DIR = \"/glove_embedding\"\n",
+ "embedding_dim = 300\n",
+ "hits = 0\n",
+ "misses = 0\n",
+ "embeddings_index = {}\n",
+ "\n",
+ "with open(os.path.join(GLOVE_DIR, 'glove.6B.300d.txt')) as f:\n",
+ " for line in f:\n",
+ " values = line.split()\n",
+ " word = values[0]\n",
+ " coefs = np.asarray(values[1:], dtype='float32')\n",
+ " embeddings_index[word] = coefs\n",
+ "\n",
+ "print('Found %s word vectors.' % len(embeddings_index))\n",
+ "\n",
+ "embedding_matrix = np.zeros((len(vocab) + 1, embedding_dim))\n",
+ "for word, i in vocab.items():\n",
+ " embedding_vector = embeddings_index.get(word)\n",
+ " if embedding_vector is not None:\n",
+ " # words not found in embedding index will be all-zeros.\n",
+ " embedding_matrix[i] = embedding_vector\n",
+ " hits += 1\n",
+ " else:\n",
+ " misses += 1\n",
+ "print(\"Converted %d words (%d misses)\" % (hits, misses))\n",
+ "\n",
+ " "
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Found 400000 word vectors.\n",
+ "Converted 5008 words (574 misses)\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3h-sT21kjV2X"
+ },
+ "source": [
+ "\n",
+ "## 3.2 Define the model "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "tshH4jK03oWM",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "67b6f8ca-5ce7-4a8f-d677-003aa2a2a847"
+ },
+ "source": [
+ "# Model architecture\n",
+ "batch_size = 32\n",
+ "embedding_dim = 300\n",
+ "max_length = 50\n",
+ "\n",
+ "def BiLSTM(vocab_size=vocab_size, tag_size=tag_size, hidden_size = 32, \n",
+ " embedding_dim=embedding_dim):\n",
+ " sequence_input = Input(shape = (max_length,))\n",
+ "\n",
+ " model = Embedding(input_dim = vocab_size+1, \n",
+ " output_dim = embedding_dim, \n",
+ " input_length = max_length, \n",
+ " embeddings_initializer=Constant(embedding_matrix),\n",
+ " trainable=False,\n",
+ " mask_zero = False)(sequence_input)\n",
+ " \n",
+ " model = Bidirectional(LSTM(units = hidden_size,return_sequences=True,\n",
+ " recurrent_dropout=0.1))(model)\n",
+ " \n",
+ " model = TimeDistributed(Dense(hidden_size, activation=\"relu\"))(model)\n",
+ " outputs = Dense(tag_size+1, activation='softmax')(model)\n",
+ " #crf = CRF(tag_size+1) # CRF layer\n",
+ " #outputs = crf(model)\n",
+ "\n",
+ " model = Model(inputs=sequence_input, outputs=outputs)\n",
+ "\n",
+ " model.compile(optimizer=\"RMSprop\", \n",
+ " loss = tf.keras.losses.categorical_crossentropy, \n",
+ " metrics=['accuracy'])\n",
+ " return model\n",
+ "\n",
+ "model = BiLSTM(vocab_size=vocab_size, tag_size=tag_size, hidden_size = 32, \\\n",
+ " embedding_dim=embedding_dim)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:Layer lstm will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.\n",
+ "WARNING:tensorflow:Layer lstm will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.\n",
+ "WARNING:tensorflow:Layer lstm will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sXWo98J3jgLQ"
+ },
+ "source": [
+ "\n",
+ "## 3.3 Callbacks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "d2hDC-w7DGvu"
+ },
+ "source": [
+ "# Callback\n",
+ "class myCallback(tf.keras.callbacks.Callback):\n",
+ " def on_epoch_end(self, epoch, logs={}):\n",
+ " if(logs.get('val_accuracy')>0.95):\n",
+ " print(\"\\nReached 95% accuracy so cancelling training!\")\n",
+ " self.model.stop_training = True\n",
+ "\n",
+ "checkpointer = ModelCheckpoint(filepath = 'NER_BiLSTM.h5',\n",
+ " verbose = 0,\n",
+ " mode = 'auto',\n",
+ " save_best_only = True,\n",
+ " monitor='val_loss')\n",
+ "\n",
+ "earlystopper = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, \n",
+ " verbose=0, mode='auto', \n",
+ " baseline=None, restore_best_weights=True)\n",
+ "\n",
+ "initial_learning_rate = 0.001\n",
+ "epochs = 10\n",
+ "decay = initial_learning_rate / epochs\n",
+ "def lr_time_based_decay(epoch, lr):\n",
+ " return lr * 1 / (1 + decay * epoch)\n",
+ "\n",
+ "lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lr_time_based_decay, verbose=1)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "gKnkkiSS4EwI"
+ },
+ "source": [
+ "\n",
+ "# Part 4: Train the Model \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "vQ7uIYmgX3SC",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "10c23a4d-c31e-45d3-9dbb-51b322b3ef9d"
+ },
+ "source": [
+ "num_epochs = 15\n",
+ "history = model.fit(train_padded_sequences, train_padded_tags, \n",
+ " batch_size=batch_size, epochs=num_epochs, \n",
+ " validation_data= (val_padded_sequences, val_padded_tags),\n",
+ " callbacks=[checkpointer, earlystopper, lr_scheduler])\n",
+ "\n",
+ "model.summary()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Epoch 1/15\n",
+ "\n",
+ "Epoch 00001: LearningRateScheduler reducing learning rate to 0.0010000000474974513.\n",
+ "245/245 [==============================] - 118s 452ms/step - loss: 0.4174 - accuracy: 0.9329 - val_loss: 0.1493 - val_accuracy: 0.9618\n",
+ "Epoch 2/15\n",
+ "\n",
+ "Epoch 00002: LearningRateScheduler reducing learning rate to 0.0009999000574917021.\n",
+ "245/245 [==============================] - 110s 449ms/step - loss: 0.1187 - accuracy: 0.9688 - val_loss: 0.1074 - val_accuracy: 0.9719\n",
+ "Epoch 3/15\n",
+ "\n",
+ "Epoch 00003: LearningRateScheduler reducing learning rate to 0.000999700106714659.\n",
+ "245/245 [==============================] - 110s 448ms/step - loss: 0.0902 - accuracy: 0.9760 - val_loss: 0.0922 - val_accuracy: 0.9759\n",
+ "Epoch 4/15\n",
+ "\n",
+ "Epoch 00004: LearningRateScheduler reducing learning rate to 0.0009994003415259673.\n",
+ "245/245 [==============================] - 110s 447ms/step - loss: 0.0762 - accuracy: 0.9794 - val_loss: 0.0825 - val_accuracy: 0.9782\n",
+ "Epoch 5/15\n",
+ "\n",
+ "Epoch 00005: LearningRateScheduler reducing learning rate to 0.0009990007918579775.\n",
+ "245/245 [==============================] - 109s 445ms/step - loss: 0.0676 - accuracy: 0.9818 - val_loss: 0.0756 - val_accuracy: 0.9802\n",
+ "Epoch 6/15\n",
+ "\n",
+ "Epoch 00006: LearningRateScheduler reducing learning rate to 0.0009985014876310735.\n",
+ "245/245 [==============================] - 109s 443ms/step - loss: 0.0614 - accuracy: 0.9836 - val_loss: 0.0713 - val_accuracy: 0.9816\n",
+ "Epoch 7/15\n",
+ "\n",
+ "Epoch 00007: LearningRateScheduler reducing learning rate to 0.0009979026914447063.\n",
+ "245/245 [==============================] - 110s 449ms/step - loss: 0.0564 - accuracy: 0.9849 - val_loss: 0.0708 - val_accuracy: 0.9817\n",
+ "Epoch 8/15\n",
+ "\n",
+ "Epoch 00008: LearningRateScheduler reducing learning rate to 0.0009972046657933619.\n",
+ "245/245 [==============================] - 110s 449ms/step - loss: 0.0523 - accuracy: 0.9861 - val_loss: 0.0684 - val_accuracy: 0.9821\n",
+ "Epoch 9/15\n",
+ "\n",
+ "Epoch 00009: LearningRateScheduler reducing learning rate to 0.0009964075567443476.\n",
+ "245/245 [==============================] - 109s 444ms/step - loss: 0.0491 - accuracy: 0.9869 - val_loss: 0.0671 - val_accuracy: 0.9825\n",
+ "Epoch 10/15\n",
+ "\n",
+ "Epoch 00010: LearningRateScheduler reducing learning rate to 0.0009955116266172385.\n",
+ "245/245 [==============================] - 108s 440ms/step - loss: 0.0461 - accuracy: 0.9876 - val_loss: 0.0676 - val_accuracy: 0.9823\n",
+ "Epoch 11/15\n",
+ "\n",
+ "Epoch 00011: LearningRateScheduler reducing learning rate to 0.0009945171376267872.\n",
+ "245/245 [==============================] - 107s 436ms/step - loss: 0.0433 - accuracy: 0.9884 - val_loss: 0.0704 - val_accuracy: 0.9814\n",
+ "Epoch 12/15\n",
+ "\n",
+ "Epoch 00012: LearningRateScheduler reducing learning rate to 0.000993424351882975.\n",
+ "245/245 [==============================] - 107s 437ms/step - loss: 0.0412 - accuracy: 0.9889 - val_loss: 0.0664 - val_accuracy: 0.9832\n",
+ "Epoch 13/15\n",
+ "\n",
+ "Epoch 00013: LearningRateScheduler reducing learning rate to 0.0009922336476668565.\n",
+ "245/245 [==============================] - 107s 435ms/step - loss: 0.0389 - accuracy: 0.9896 - val_loss: 0.0649 - val_accuracy: 0.9835\n",
+ "Epoch 14/15\n",
+ "\n",
+ "Epoch 00014: LearningRateScheduler reducing learning rate to 0.00099094540310837.\n",
+ "245/245 [==============================] - 108s 440ms/step - loss: 0.0373 - accuracy: 0.9900 - val_loss: 0.0642 - val_accuracy: 0.9838\n",
+ "Epoch 15/15\n",
+ "\n",
+ "Epoch 00015: LearningRateScheduler reducing learning rate to 0.0009895599961864132.\n",
+ "245/245 [==============================] - 109s 444ms/step - loss: 0.0352 - accuracy: 0.9905 - val_loss: 0.0668 - val_accuracy: 0.9838\n",
+ "Model: \"model\"\n",
+ "_________________________________________________________________\n",
+ "Layer (type) Output Shape Param # \n",
+ "=================================================================\n",
+ "input_1 (InputLayer) [(None, 50)] 0 \n",
+ "_________________________________________________________________\n",
+ "embedding (Embedding) (None, 50, 300) 1674900 \n",
+ "_________________________________________________________________\n",
+ "bidirectional (Bidirectional (None, 50, 64) 85248 \n",
+ "_________________________________________________________________\n",
+ "time_distributed (TimeDistri (None, 50, 32) 2080 \n",
+ "_________________________________________________________________\n",
+ "dense_1 (Dense) (None, 50, 27) 891 \n",
+ "=================================================================\n",
+ "Total params: 1,763,119\n",
+ "Trainable params: 88,219\n",
+ "Non-trainable params: 1,674,900\n",
+ "_________________________________________________________________\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "M3vHe1vaC-fG",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "6a982b63-5732-4ef2-a3b2-db581ec329d8"
+ },
+ "source": [
+ "history.history.keys()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy', 'lr'])"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 24
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "K-LuVEjYZ1qg",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 73
+ },
+ "outputId": "8c62eb5c-3ef0-4630-829d-8b52aaea2086"
+ },
+ "source": [
+ "acc = history.history['accuracy']\n",
+ "val_acc = history.history['val_accuracy']\n",
+ "loss = history.history['loss']\n",
+ "val_loss = history.history['val_loss']\n",
+ "plt.figure(figsize = (8,8))\n",
+ "epochs = range(1, len(acc) + 1)\n",
+ "plt.plot(epochs, acc, 'wo', label='Training acc')\n",
+ "plt.plot(epochs, val_acc, 'w', label='Validation acc')\n",
+ "plt.title('Training and validation accuracy')\n",
+ "plt.legend()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 25
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "_orawE5havtv",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 499
+ },
+ "outputId": "fc17f13c-d589-45a5-aa37-17dab1091f93"
+ },
+ "source": [
+ "plt.figure(figsize = (8,8))\n",
+ "plt.plot(epochs, loss, 'wo', label='Training loss')\n",
+ "plt.plot(epochs, val_loss, 'w', label='Validation loss')\n",
+ "plt.title('Training and validation loss')\n",
+ "plt.legend()\n",
+ "plt.show()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-ygvHcHU4Nkj"
+ },
+ "source": [
+ "\n",
+ "# Part 5: Test the model\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "SYIAQDjhsHNQ"
+ },
+ "source": [
+ "# Convert from one-hot encoding (3D array) to 2D array \n",
+ "test_padded_tags_pred = model.predict(test_padded_sequences)\n",
+ "test_padded_tags_pred = np.argmax(test_padded_tags_pred, axis=-1)\n",
+ "test_padded_tags_true = np.argmax(test_padded_tags, axis=-1)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "aS4oaqkBE4XX"
+ },
+ "source": [
+ "# Convert the index to tag\n",
+ "test_tags_pred =[0]*len(test_padded_tags_pred)\n",
+ "for idx, row in enumerate(test_padded_tags_pred):\n",
+ " add = []\n",
+ " for i in row:\n",
+ " add.append(reverse_tag_map[i]) if i != 0 else add.append(\"PAD\")\n",
+ " test_tags_pred[idx] = add\n",
+ "\n",
+ "test_tags_true =[0]*len(test_padded_tags_true)\n",
+ "for idx, row in enumerate(test_padded_tags_true):\n",
+ " add = []\n",
+ " for i in row:\n",
+ " add.append(reverse_tag_map[i]) if i != 0 else add.append(\"PAD\")\n",
+ " test_tags_true[idx] = add"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "OO5Pg9r2MmV8",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "5b9d58d6-77dc-4381-8a74-3e26c8dea358"
+ },
+ "source": [
+ "print(\"Micro F1-score is : {:.1%}\".format(f1_score(test_tags_true, test_tags_pred)))\n",
+ "print(\"Micro Precision-score is : {:.1%}\".format(precision_score(test_tags_true, test_tags_pred)))\n",
+ "print(\"Micro Recall-score is : {:.1%}\".format(recall_score(test_tags_true, test_tags_pred)))\n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PAD seems not to be NE tag.\n",
+ " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "Micro F1-score is : 82.8%\n",
+ "Micro Precision-score is : 82.0%\n",
+ "Micro Recall-score is : 83.7%\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "znKBJcadMr9l",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "6952c03e-ac24-44e3-8cba-752c94c6f27d"
+ },
+ "source": [
+ "!pip install sklearn_crfsuite\n",
+ "from sklearn_crfsuite.metrics import flat_classification_report\n",
+ "\n",
+ "report = flat_classification_report(y_pred=test_tags_pred, y_true=test_tags_true)\n",
+ "print(report)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: sklearn_crfsuite in /usr/local/lib/python3.7/dist-packages (0.3.6)\n",
+ "Requirement already satisfied: python-crfsuite>=0.8.3 in /usr/local/lib/python3.7/dist-packages (from sklearn_crfsuite) (0.9.7)\n",
+ "Requirement already satisfied: tqdm>=2.0 in /usr/local/lib/python3.7/dist-packages (from sklearn_crfsuite) (4.41.1)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sklearn_crfsuite) (1.15.0)\n",
+ "Requirement already satisfied: tabulate in /usr/local/lib/python3.7/dist-packages (from sklearn_crfsuite) (0.8.9)\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+ " _warn_prf(average, modifier, msg_start, len(result))\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " B-ACTOR 0.89 0.92 0.91 812\n",
+ " B-CHARACTER 0.70 0.49 0.58 90\n",
+ " B-DIRECTOR 0.90 0.83 0.87 456\n",
+ " B-GENRE 0.91 0.94 0.92 1117\n",
+ " B-PLOT 0.74 0.58 0.65 491\n",
+ " B-RATING 0.97 0.97 0.97 500\n",
+ "B-RATINGS_AVERAGE 0.91 0.88 0.90 451\n",
+ " B-REVIEW 0.67 0.04 0.07 56\n",
+ " B-SONG 0.85 0.52 0.64 54\n",
+ " B-TITLE 0.79 0.75 0.77 562\n",
+ " B-TRAILER 0.84 0.90 0.87 30\n",
+ " B-YEAR 0.95 0.94 0.94 720\n",
+ " I-ACTOR 0.91 0.89 0.90 862\n",
+ " I-CHARACTER 0.57 0.39 0.46 75\n",
+ " I-DIRECTOR 0.89 0.85 0.87 496\n",
+ " I-GENRE 0.86 0.72 0.78 222\n",
+ " I-PLOT 0.75 0.40 0.52 496\n",
+ " I-RATING 0.97 0.87 0.92 226\n",
+ "I-RATINGS_AVERAGE 0.86 0.81 0.84 403\n",
+ " I-REVIEW 0.25 0.02 0.04 45\n",
+ " I-SONG 0.76 0.67 0.71 119\n",
+ " I-TITLE 0.74 0.81 0.77 856\n",
+ " I-TRAILER 0.00 0.00 0.00 8\n",
+ " I-YEAR 0.96 0.97 0.96 610\n",
+ " O 0.95 0.97 0.96 14929\n",
+ " PAD 1.00 1.00 1.00 97514\n",
+ "\n",
+ " accuracy 0.98 122200\n",
+ " macro avg 0.79 0.70 0.72 122200\n",
+ " weighted avg 0.98 0.98 0.98 122200\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "HE7Q2oD2M9rL",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "d2c6e203-d751-4b46-c631-b5b4c165fdc3"
+ },
+ "source": [
+ "# At every execution model picks some random test sample from test set.\n",
+ "i = np.random.randint(0,test_padded_sequences.shape[0]) # choose a random number between 0 and len(X_te)b\n",
+ "p = model.predict(np.array([test_padded_sequences[i]]))\n",
+ "p = np.argmax(p, axis=-1)\n",
+ "true = np.argmax(test_padded_tags[i], -1)\n",
+ "\n",
+ "print(\"Sample number {} of {} (Test Set)\".format(i, test_padded_sequences.shape[0]))\n",
+ "# Visualization\n",
+ "print(\"{:20}||{:20}||{}\".format(\"Word\", \"True\", \"Pred\"))\n",
+ "print(60 * \"=\")\n",
+ "for word, tag, pred in zip(test_padded_sequences[i], true, p[0]):\n",
+ " if word != 0:\n",
+ " print(\"{:20}: {:20} {}\".format(reverse_vocab[word], reverse_tag_map[tag], reverse_tag_map[pred]))\n",
+ "\n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Sample number 1175 of 2444 (Test Set)\n",
+ "Word ||True ||Pred\n",
+ "============================================================\n",
+ "did : O O\n",
+ "steven : B-DIRECTOR B-DIRECTOR\n",
+ "spielberg : I-DIRECTOR I-DIRECTOR\n",
+ "direct : O O\n",
+ "any : O O\n",
+ "horror : B-GENRE B-GENRE\n",
+ "movie : O O\n",
+ "in : O O\n",
+ "the : O O\n",
+ "1980 : B-YEAR B-YEAR\n",
+ "s : I-YEAR I-YEAR\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sNtJLetx4Sb_"
+ },
+ "source": [
+ "\n",
+ "# Part 6: Test with your own sentence"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "V0niUrOnVskX",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 722
+ },
+ "outputId": "51453fb1-958b-4b99-c405-6b8e4a705b57"
+ },
+ "source": [
+ "# if this cell fails => run the 2nd time, it will work\n",
+ "\n",
+ "original_to_test = [\"is michael scofield the protagonist in prison break\", \n",
+ " \"what is the highest rated romantic movie in all time\"]\n",
+ "\n",
+ "to_test = apply_preproc(original_to_test)\n",
+ "\n",
+ "vocab_tokenizer.fit_on_texts(to_test)\n",
+ "to_test = vocab_tokenizer.texts_to_sequences(to_test)\n",
+ "\n",
+ "to_test = pad_sequences(to_test,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "to_test_tag_pred = model.predict(to_test)\n",
+ "to_test_tag_pred = np.argmax(to_test_tag_pred, axis=-1)\n",
+ "\n",
+ "for i, row in enumerate(to_test_tag_pred):\n",
+ " print(\"\\n{:20}||{}\".format(\"Word\", \"Pred\"))\n",
+ " print(40 * \"=\")\n",
+ " for j, pred in enumerate(row):\n",
+ " words = original_to_test[i].split(' ')\n",
+ " length = len(words)\n",
+ " if pred != 0 and j < length:\n",
+ " print(\"{:20}: {}\".format(words[j], reverse_tag_map[pred]))\n",
+ " "
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "error",
+ "ename": "InvalidArgumentError",
+ "evalue": "ignored",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m padding=pad_type)\n\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mto_test_tag_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mto_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0mto_test_tag_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mto_test_tag_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 1725\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mstep\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdata_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msteps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1726\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_predict_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1727\u001b[0;31m \u001b[0mtmp_batch_outputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1728\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdata_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshould_sync\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1729\u001b[0m \u001b[0mcontext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masync_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 887\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 888\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mOptionalXlaContext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jit_compile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 889\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 890\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 891\u001b[0m \u001b[0mnew_tracing_count\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 922\u001b[0m \u001b[0;31m# In this case we have not created variables on the first call. So we can\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 923\u001b[0m \u001b[0;31m# run the first trace but we should fail if variables are created.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 924\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 925\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_created_variables\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 926\u001b[0m raise ValueError(\"Creating variables on a non-first call to a function\"\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 3022\u001b[0m filtered_flat_args) = self._maybe_define_function(args, kwargs)\n\u001b[1;32m 3023\u001b[0m return graph_function._call_flat(\n\u001b[0;32m-> 3024\u001b[0;31m filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access\n\u001b[0m\u001b[1;32m 3025\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3026\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m 1959\u001b[0m \u001b[0;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1960\u001b[0m return self._build_call_outputs(self._inference_function.call(\n\u001b[0;32m-> 1961\u001b[0;31m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[1;32m 1962\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n\u001b[1;32m 1963\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m 594\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 595\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattrs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 596\u001b[0;31m ctx=ctx)\n\u001b[0m\u001b[1;32m 597\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 598\u001b[0m outputs = execute.execute_with_cancellation(\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[0;32m---> 60\u001b[0;31m inputs, attrs, num_outputs)\n\u001b[0m\u001b[1;32m 61\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mInvalidArgumentError\u001b[0m: 2 root error(s) found.\n (0) Invalid argument: indices[0,2] = 5583 is not in [0, 5583)\n\t [[node model/embedding/embedding_lookup (defined at :2) ]]\n (1) Invalid argument: indices[0,2] = 5583 is not in [0, 5583)\n\t [[node model/embedding/embedding_lookup (defined at :2) ]]\n\t [[model/embedding/embedding_lookup/_6]]\n0 successful operations.\n0 derived errors ignored. [Op:__inference_predict_function_17969]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node model/embedding/embedding_lookup:\n model/embedding/embedding_lookup/17449 (defined at /usr/lib/python3.7/contextlib.py:112)\n\nInput Source operations connected to node model/embedding/embedding_lookup:\n model/embedding/embedding_lookup/17449 (defined at /usr/lib/python3.7/contextlib.py:112)\n\nFunction call stack:\npredict_function -> predict_function\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "WZJApvzvaJcy"
+ },
+ "source": [
+ "\n",
+ "# Part 7: Analyse the incorrect predictions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "fb8q_rD4aNRH"
+ },
+ "source": [
+ "def get_incorrect(y_pred, y_true, X_test):\n",
+ " y_pred.flatten()\n",
+ " y_true.flatten()\n",
+ " X_test.flatten()\n",
+ " where_incorrect = y_true != y_pred\n",
+ " incorrect_idxes = np.where(where_incorrect==1)[0]\n",
+ " incorrect_tokens = X_test[incorrect_idxes]\n",
+ " incorrect_tokens = dict(Counter(incorrect_tokens.flatten()))\n",
+ " incorrect_tags = y_true[incorrect_idxes]\n",
+ " incorrect_tags = dict(Counter(incorrect_tags.flatten()))\n",
+ " return incorrect_tokens, incorrect_tags\n",
+ "\n",
+ "\n",
+ "incorrect_tokens, incorrect_tags = get_incorrect(test_padded_tags_pred, \n",
+ " test_padded_tags_true, \n",
+ " test_padded_sequences) \n",
+ "\n",
+ "incorrect_tokens = sorted(incorrect_tokens.items(), key=lambda x:x[1], reverse=True)\n",
+ "incorrect_tags = sorted(incorrect_tags.items(), key=lambda x:x[1], reverse=True)\n",
+ "\n",
+ "print(\"{:^20}||{:^15}\".format(\"Incorrect word\", \"Frequency\"))\n",
+ "print(37 * \"=\")\n",
+ "for idx, count in incorrect_tokens[:20]:\n",
+ " if idx != 0:\n",
+ " print(\"{:20}: {:15}\".format(reverse_vocab[idx], count))\n",
+ "\n",
+ "print(\"\\n{:^20}||{:^15}\".format(\"Incorrect tag\", \"Frequency\"))\n",
+ "print(37 * \"=\")\n",
+ "for idx, count in incorrect_tags[:20]:\n",
+ " if idx != 0:\n",
+ " print(\"{:20}: {:15}\".format(reverse_tag_map[idx], count))\n"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5rOw_59Ztzy7"
+ },
+ "source": [
+ "## Conclusion after analysis "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "NiAN2Zy--j-_"
+ },
+ "source": [
+ "1. A lot of incorrect words are stopwords, due to their occurence in the movie titles/plots sometimes. This is the problem of token label inconsistency.\n",
+ "2. The values of macro-average are much lower than micro-average of Precision, Recall and F1-score, as a result of imbalanced classes.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "N2J2LDiBRkyq"
+ },
+ "source": [
+ "## Potential improvements"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "9aBUF7SIHVhd"
+ },
+ "source": [
+ "1. To tackle imbalanced classes problem, use over-sample to gain more examples of tags from the minority groups.\n",
+ "2. To tackle label inconsistency, there're 3 solutions:\n",
+ "\n",
+ " * Use larger context. For example, use longer sentences, or combine 2 or more sentences that have similar/corelated meaning.\n",
+ " * Use CRF decoder layer.\n",
+ " * Use Character/Subword-level encoders like ELMO, Flair, CNN and BERT. \n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "NkJ8Go7BVVgz"
+ },
+ "source": [
+ "\n",
+ "# Export result to .tsv file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "3EQxLZgzRdL9"
+ },
+ "source": [
+ "# write result to a new txt file\n",
+ "with open('/tmp/pred.tsv', 'wt') as out_file:\n",
+ " tsv_writer = csv.writer(out_file, delimiter='\\t')\n",
+ " test_size = len(test_padded_sequences)\n",
+ " for i in range(test_size):\n",
+ " for pred, word in zip(test_padded_tags_pred[i], test_padded_sequences[i]):\n",
+ " if pred != 0 and word != 0:\n",
+ " tsv_writer.writerow([reverse_tag_map[pred], reverse_vocab[word]])\n",
+ " tsv_writer.writerow([])"
+ ],
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file
diff --git a/NER_trivia_BiLSTM.ipynb b/NER_trivia_BiLSTM.ipynb
new file mode 100644
index 0000000..2c46a3c
--- /dev/null
+++ b/NER_trivia_BiLSTM.ipynb
@@ -0,0 +1,2104 @@
+{
+ "nbformat": 4,
+ "nbformat_minor": 0,
+ "metadata": {
+ "colab": {
+ "name": "NER_trivia_BiLSTM.ipynb",
+ "provenance": [],
+ "collapsed_sections": [],
+ "toc_visible": true
+ },
+ "kernelspec": {
+ "name": "python3",
+ "display_name": "Python 3"
+ },
+ "accelerator": "GPU"
+ },
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "YsSB3CDt2NxF"
+ },
+ "source": [
+ "## Outline\n",
+ "- [Introduction](#0)\n",
+ " - [Import libraries](#0.1)\n",
+ "- [Part 1: Explore the data](#1)\n",
+ " - [1.1 Import the datasets](#1.1)\n",
+ " - [1.2 Exploratory Analysis](#1.2)\n",
+ " - [Conclusion after analysis](#1.3)\n",
+ " \n",
+ "- [Part 2: Pre-process the data](#2)\n",
+ " - [2.1 Stemming](#2.1)\n",
+ " - [2.2 Lemmatization](#2.2)\n",
+ " - [2.3 Replacement](#2.3)\n",
+ " - [2.4 Pre-processing pipeline](#2.4)\n",
+ " - [2.5 Split to train/val datasets](#2.5)\n",
+ " - [2.6 Tokenization and Padding](#2.6)\n",
+ " - [2.7 Check the Imbalance in train dataset](#2.7)\n",
+ " - [2.8 One-hot encoding](#2.8)\n",
+ "\n",
+ "- [Part 3: Build the model](#3)\n",
+ " - [3.1 Glove Embedding](#3.1)\n",
+ " - [3.2 Define the model](#3.2)\n",
+ " - [3.3 Callbacks](#3.3)\n",
+ " \n",
+ "\n",
+ "- [Part 4: Train the model](#4)\n",
+ "- [Part 5: Test the model](#5)\n",
+ "- [Part 6: Test with your own sentence](#6)\n",
+ "\n",
+ "- [Part 7: Analyse the incorrect predictions](#7)\n",
+ " - [Potential improvement](#7.1)\n",
+ "\n",
+ "- [Export result to .tsv file](#8)\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Uj9t5Iav2nzR"
+ },
+ "source": [
+ "\n",
+ "# Introduction\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "pZDTBRDfi6u7"
+ },
+ "source": [
+ "\n",
+ "## Import libraries"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "Ntqkdg4N3HW4",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "e3960f95-492d-40c7-f57a-1d0e39c11fcd"
+ },
+ "source": [
+ "!python --version\n",
+ "import os\n",
+ "\n",
+ "%tensorflow_version 2.x\n",
+ "import tensorflow as tf\n",
+ "print(tf.__version__)\n",
+ "\n",
+ "# build the tokenized sentences and tags\n",
+ "from tensorflow.keras.preprocessing.text import Tokenizer\n",
+ "from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
+ "\n",
+ "from tensorflow.keras.utils import to_categorical\n",
+ "from tensorflow.keras.initializers import Constant\n",
+ "from tensorflow.keras import Model\n",
+ "from tensorflow.keras.layers import Input, Embedding, Bidirectional, LSTM, \\\n",
+ "TimeDistributed, Dense, Dropout\n",
+ "from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, \\\n",
+ "LearningRateScheduler\n",
+ "\n",
+ "import numpy as np # linear algebra\n",
+ "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
+ "import matplotlib.pyplot as plt\n",
+ "%matplotlib inline\n",
+ "import seaborn as sns\n",
+ "from collections import Counter\n",
+ "import random as rnd\n",
+ "from nltk.corpus import stopwords\n",
+ "import nltk\n",
+ "nltk.download('stopwords')\n",
+ "from nltk.stem import WordNetLemmatizer \n",
+ "from nltk.stem import PorterStemmer\n",
+ "\n",
+ "!pip install sklearn_crfsuite\n",
+ "from sklearn_crfsuite.metrics import flat_classification_report\n",
+ "!pip install seqeval\n",
+ "from seqeval.metrics import precision_score, recall_score, f1_score, classification_report\n",
+ "import csv\n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Python 3.7.10\n",
+ "2.5.0\n",
+ "[nltk_data] Downloading package stopwords to /root/nltk_data...\n",
+ "[nltk_data] Unzipping corpora/stopwords.zip.\n",
+ "Collecting sklearn_crfsuite\n",
+ " Downloading https://files.pythonhosted.org/packages/25/74/5b7befa513482e6dee1f3dd68171a6c9dfc14c0eaa00f885ffeba54fe9b0/sklearn_crfsuite-0.3.6-py2.py3-none-any.whl\n",
+ "Requirement already satisfied: tqdm>=2.0 in /usr/local/lib/python3.7/dist-packages (from sklearn_crfsuite) (4.41.1)\n",
+ "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from sklearn_crfsuite) (1.15.0)\n",
+ "Requirement already satisfied: tabulate in /usr/local/lib/python3.7/dist-packages (from sklearn_crfsuite) (0.8.9)\n",
+ "Collecting python-crfsuite>=0.8.3\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/79/47/58f16c46506139f17de4630dbcfb877ce41a6355a1bbf3c443edb9708429/python_crfsuite-0.9.7-cp37-cp37m-manylinux1_x86_64.whl (743kB)\n",
+ "\u001b[K |████████████████████████████████| 747kB 8.7MB/s \n",
+ "\u001b[?25hInstalling collected packages: python-crfsuite, sklearn-crfsuite\n",
+ "Successfully installed python-crfsuite-0.9.7 sklearn-crfsuite-0.3.6\n",
+ "Collecting seqeval\n",
+ "\u001b[?25l Downloading https://files.pythonhosted.org/packages/9d/2d/233c79d5b4e5ab1dbf111242299153f3caddddbb691219f363ad55ce783d/seqeval-1.2.2.tar.gz (43kB)\n",
+ "\u001b[K |████████████████████████████████| 51kB 4.5MB/s \n",
+ "\u001b[?25hRequirement already satisfied: numpy>=1.14.0 in /usr/local/lib/python3.7/dist-packages (from seqeval) (1.19.5)\n",
+ "Requirement already satisfied: scikit-learn>=0.21.3 in /usr/local/lib/python3.7/dist-packages (from seqeval) (0.22.2.post1)\n",
+ "Requirement already satisfied: scipy>=0.17.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.4.1)\n",
+ "Requirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.21.3->seqeval) (1.0.1)\n",
+ "Building wheels for collected packages: seqeval\n",
+ " Building wheel for seqeval (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
+ " Created wheel for seqeval: filename=seqeval-1.2.2-cp37-none-any.whl size=16184 sha256=9799e8a9d85ef24cf7d61633bec3a02b9574dda85b1785d301d2f0d8a3bbc28d\n",
+ " Stored in directory: /root/.cache/pip/wheels/52/df/1b/45d75646c37428f7e626214704a0e35bd3cfc32eda37e59e5f\n",
+ "Successfully built seqeval\n",
+ "Installing collected packages: seqeval\n",
+ "Successfully installed seqeval-1.2.2\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "T58oiLSU25Za"
+ },
+ "source": [
+ "\n",
+ "# Part 1: Explore the data"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "WP5M3GnHxY-9"
+ },
+ "source": [
+ "\n",
+ "## 1.1 Import the datasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "iHhxVylamxCG",
+ "outputId": "61a675f5-464b-4cf3-caf9-1f7ece62d62a"
+ },
+ "source": [
+ "# Create new directories\n",
+ "!mkdir -p /data/eng\n",
+ "!mkdir -p /data/trivia10k13\n",
+ "!mkdir -p /model\n",
+ "\n",
+ "# Download data\n",
+ "!wget --no-check-certificate \\\n",
+ "https://groups.csail.mit.edu/sls/downloads/movie/engtrain.bio \\\n",
+ "-O /data/eng/train.tsv\n",
+ "\n",
+ "!wget --no-check-certificate \\\n",
+ "https://groups.csail.mit.edu/sls/downloads/movie/engtest.bio \\\n",
+ "-O /data/eng/test.tsv\n",
+ "\n",
+ "!wget --no-check-certificate \\\n",
+ "https://groups.csail.mit.edu/sls/downloads/movie/trivia10k13train.bio \\\n",
+ "-O /data/trivia10k13/train.tsv\n",
+ "\n",
+ "!wget --no-check-certificate \\\n",
+ "https://groups.csail.mit.edu/sls/downloads/movie/trivia10k13test.bio \\\n",
+ "-O /data/trivia10k13/test.tsv"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "--2021-06-02 17:10:01-- https://groups.csail.mit.edu/sls/downloads/movie/engtrain.bio\n",
+ "Resolving groups.csail.mit.edu (groups.csail.mit.edu)... 128.30.2.44\n",
+ "Connecting to groups.csail.mit.edu (groups.csail.mit.edu)|128.30.2.44|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 1013492 (990K)\n",
+ "Saving to: ‘/data/eng/train.tsv’\n",
+ "\n",
+ "/data/eng/train.tsv 100%[===================>] 989.74K 2.23MB/s in 0.4s \n",
+ "\n",
+ "2021-06-02 17:10:02 (2.23 MB/s) - ‘/data/eng/train.tsv’ saved [1013492/1013492]\n",
+ "\n",
+ "--2021-06-02 17:10:02-- https://groups.csail.mit.edu/sls/downloads/movie/engtest.bio\n",
+ "Resolving groups.csail.mit.edu (groups.csail.mit.edu)... 128.30.2.44\n",
+ "Connecting to groups.csail.mit.edu (groups.csail.mit.edu)|128.30.2.44|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 252636 (247K)\n",
+ "Saving to: ‘/data/eng/test.tsv’\n",
+ "\n",
+ "/data/eng/test.tsv 100%[===================>] 246.71K 826KB/s in 0.3s \n",
+ "\n",
+ "2021-06-02 17:10:03 (826 KB/s) - ‘/data/eng/test.tsv’ saved [252636/252636]\n",
+ "\n",
+ "--2021-06-02 17:10:03-- https://groups.csail.mit.edu/sls/downloads/movie/trivia10k13train.bio\n",
+ "Resolving groups.csail.mit.edu (groups.csail.mit.edu)... 128.30.2.44\n",
+ "Connecting to groups.csail.mit.edu (groups.csail.mit.edu)|128.30.2.44|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 1785558 (1.7M)\n",
+ "Saving to: ‘/data/trivia10k13/train.tsv’\n",
+ "\n",
+ "/data/trivia10k13/t 100%[===================>] 1.70M 3.35MB/s in 0.5s \n",
+ "\n",
+ "2021-06-02 17:10:04 (3.35 MB/s) - ‘/data/trivia10k13/train.tsv’ saved [1785558/1785558]\n",
+ "\n",
+ "--2021-06-02 17:10:04-- https://groups.csail.mit.edu/sls/downloads/movie/trivia10k13test.bio\n",
+ "Resolving groups.csail.mit.edu (groups.csail.mit.edu)... 128.30.2.44\n",
+ "Connecting to groups.csail.mit.edu (groups.csail.mit.edu)|128.30.2.44|:443... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 438729 (428K)\n",
+ "Saving to: ‘/data/trivia10k13/test.tsv’\n",
+ "\n",
+ "/data/trivia10k13/t 100%[===================>] 428.45K 1.13MB/s in 0.4s \n",
+ "\n",
+ "2021-06-02 17:10:04 (1.13 MB/s) - ‘/data/trivia10k13/test.tsv’ saved [438729/438729]\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "id": "-D8_xEVDN-dj",
+ "outputId": "97f1f98b-0097-46e2-fabd-800939a857d0"
+ },
+ "source": [
+ "def get_sentence(file_path):\n",
+ " '''\n",
+ " Input:\n",
+ " file_path - path to the tsv file\n",
+ " Output:\n",
+ " sentences - list of sentences in string format\n",
+ " tags - list associated tags in string format\n",
+ " '''\n",
+ " sentences = []\n",
+ " tags = []\n",
+ " with open(file_path) as f:\n",
+ " contents = f.read()\n",
+ " sens_tags = contents.split(\"\\n\\n\")\n",
+ " for sen_tag in sens_tags:\n",
+ " words_tags = sen_tag.split(\"\\n\")\n",
+ " while (\"\" in words_tags):\n",
+ " words_tags.remove(\"\")\n",
+ " sen = ' '.join([word_tag.split(\"\\t\")[1] for word_tag in words_tags])\n",
+ " tag = ' '.join([word_tag.split(\"\\t\")[0] for word_tag in words_tags])\n",
+ " sentences.append(sen)\n",
+ " tags.append(tag)\n",
+ "\n",
+ " return sentences, tags\n",
+ "\n",
+ "\n",
+ "train_path = \"/data/trivia10k13/train.tsv\"\n",
+ "test_path = \"/data/trivia10k13/test.tsv\"\n",
+ "\n",
+ "sentences, tags = get_sentence(train_path)\n",
+ "test_sentences, test_tags = get_sentence(test_path)\n",
+ "\n",
+ "print(\"The train dataset has {} sentences.\".format(len(sentences)))\n",
+ "print(\"The test dataset has {} sentences.\".format(len(test_sentences)))"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "The train dataset has 7817 sentences.\n",
+ "The test dataset has 1954 sentences.\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "SJyYqFkQNCoW"
+ },
+ "source": [
+ "\n",
+ "## 1.2 Exploratory Analysis"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 198
+ },
+ "id": "Q6KzO6mq8326",
+ "outputId": "1e12ffca-e377-4ad9-a60c-a5aa2df4a1bb"
+ },
+ "source": [
+ "# Take a look at the data\n",
+ "df = pd.read_csv(train_path, delimiter=\"\\t\", names=[\"Tag\", \"Word\"])\n",
+ "df.head()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Tag | \n",
+ " Word | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " B-Actor | \n",
+ " steve | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " I-Actor | \n",
+ " mcqueen | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " O | \n",
+ " provided | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " O | \n",
+ " a | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " B-Plot | \n",
+ " thrilling | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Tag Word\n",
+ "0 B-Actor steve\n",
+ "1 I-Actor mcqueen\n",
+ "2 O provided\n",
+ "3 O a\n",
+ "4 B-Plot thrilling"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 4
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "tBVUmUQtFRJ1"
+ },
+ "source": [
+ "\n",
+ "### 1.2.1 Sentence Length \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 584
+ },
+ "id": "h2J38zVa-wkD",
+ "outputId": "c4f9ef1d-a3ab-46e5-9ba3-380b9f3f4495"
+ },
+ "source": [
+ "plt.style.use(\"dark_background\")\n",
+ "# How long are the sentences?\n",
+ "def plot_sentence_length_histogram(list_sentences):\n",
+ " '''\n",
+ " Input:\n",
+ " list_sentences - a list of sentences\n",
+ " Output:\n",
+ " [print] - Min, Max, Median and Average value of sentence length\n",
+ " [plot] - Histogram plot of sentence length\n",
+ " '''\n",
+ " lengths = [len(sen.split(' ')) for sen in list_sentences]\n",
+ " a4_dims = (11.7, 8.27)\n",
+ " fig, ax = plt.subplots(figsize=a4_dims)\n",
+ " sns.histplot(lengths)\n",
+ " plt.xlabel(\"Number of tokens in a sentence\")\n",
+ " plt.ylabel(\"Number of occurrences\")\n",
+ " print(\"Min: \",np.min(lengths))\n",
+ " print(\"Max: \",np.max(lengths))\n",
+ " \n",
+ " print(\"Median: \",np.median(lengths))\n",
+ " print(\"Average: \",round(np.mean(lengths),2))\n",
+ "\n",
+ "plot_sentence_length_histogram(sentences)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Min: 1\n",
+ "Max: 71\n",
+ "Median: 19.0\n",
+ "Average: 20.32\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "qY8VCLDQhCbJ"
+ },
+ "source": [
+ "### 1.2.2 Entity Length"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 584
+ },
+ "id": "XQrFzlrkhCm3",
+ "outputId": "592c48e6-e95d-40d0-ee98-5f0b3fd25b3a"
+ },
+ "source": [
+ "# How long are the entities?\n",
+ "def plot_entity_length_histogram(series):\n",
+ " '''\n",
+ " Input:\n",
+ " series - a pandas series of the tags\n",
+ " Output:\n",
+ " [print] - Min, Max, Median and Average value of entity length\n",
+ " [plot] - Histogram plot of entity length\n",
+ " '''\n",
+ " tags_list=[tag for tag in series]\n",
+ " tag_length = []\n",
+ " current_length = 0\n",
+ " for tag in tags_list:\n",
+ " if tag.startswith(\"B\"):\n",
+ " tag_length.append(current_length)\n",
+ " current_length = 1\n",
+ " elif tag.startswith(\"I\"):\n",
+ " current_length += 1\n",
+ " tag_length = tag_length[1:]\n",
+ " \n",
+ " a4_dims = (11.7, 8.27)\n",
+ " fig, ax = plt.subplots(figsize=a4_dims)\n",
+ " sns.histplot(tag_length)\n",
+ " plt.xlabel(\"Number of tokens in a tag\")\n",
+ " plt.ylabel(\"Number of occurrences\")\n",
+ " print(\"Min: \",np.min(tag_length))\n",
+ " print(\"Max: \",np.max(tag_length))\n",
+ " print(\"Median: \",np.median(tag_length))\n",
+ " print(\"Average: \",round(np.mean(tag_length),2))\n",
+ "\n",
+ "plot_entity_length_histogram(df[\"Tag\"])"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Min: 1\n",
+ "Max: 44\n",
+ "Median: 2.0\n",
+ "Average: 4.47\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "OcoUQIfwfOAe"
+ },
+ "source": [
+ "### 1.2.3 Token frequency"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "b2kvMJJ3KvXu",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 567
+ },
+ "outputId": "e4a479bf-ef36-40d7-8a47-d2003e0eae9c"
+ },
+ "source": [
+ "def plot_top_non_stopwords_barchart(series, top=20, word=True):\n",
+ " '''\n",
+ " Input:\n",
+ " series - a pd.Series of words or tags\n",
+ " top - number of most common words to plot\n",
+ " Output:\n",
+ " [print] - No of distinct words in train dataset\n",
+ " [plot] - Barchart of most common words' occurrence\n",
+ " '''\n",
+ " stop=set(stopwords.words('english'))\n",
+ " value = 'words' if word == True else 'tags'\n",
+ " corpus=[word for word in series]\n",
+ " counter=Counter(corpus)\n",
+ " print(\"There are {} distinct {} in dataset\".format(len(dict(counter)), value))\n",
+ " print(dict(counter))\n",
+ "\n",
+ " most=counter.most_common()\n",
+ " x, y=[], []\n",
+ " for word,count in most:\n",
+ " if (word not in stop):\n",
+ " x.append(count)\n",
+ " y.append(word)\n",
+ " if len(x) == top:\n",
+ " break\n",
+ " a4_dims = (11.7, 8.27)\n",
+ " fig, ax = plt.subplots(figsize=a4_dims)\n",
+ " sns.barplot(x=x,y=y)\n",
+ " plt.xlabel(\"Number of {} occurrences in a sentence\".format(value))\n",
+ " plt.ylabel(\"Most common {}s\".format(value))\n",
+ " return dict(counter)\n",
+ "\n",
+ "word_counter = plot_top_non_stopwords_barchart(df[\"Word\"], top=40)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "There are 10987 distinct words in dataset\n",
+ "{'steve': 85, 'mcqueen': 12, 'provided': 6, 'a': 7798, 'thrilling': 7, 'motorcycle': 8, 'chase': 19, 'in': 3435, 'this': 1962, 'greatest': 25, 'of': 4255, 'all': 177, 'ww': 5, '2': 43, 'prison': 52, 'escape': 35, 'movies': 80, 'liza': 3, 'minnelli': 7, 'and': 4049, 'joel': 16, 'gray': 5, 'won': 100, 'oscars': 22, 'for': 781, 'their': 346, 'roles': 25, '1972': 23, 'movie': 3924, 'that': 2194, 'follows': 64, 'nightclub': 2, 'entertainers': 1, 'berlin': 8, 'as': 1373, 'the': 8372, 'nazis': 16, 'come': 49, 'to': 2536, 'power': 25, 'what': 4273, 'is': 3785, 'tom': 147, 'hanks': 81, 'julia': 30, 'roberts': 26, 'about': 1849, 'who': 1716, 'plays': 175, 'down': 87, 'on': 1130, 'his': 1169, 'luck': 5, 'average': 9, 'guy': 118, 'goes': 117, 'back': 137, 'college': 50, 'gets': 125, 'taught': 7, 'by': 1411, 'making': 22, 'fun': 21, 'macgyver': 2, 're': 20, 'enacting': 1, 'scenes': 14, 'similar': 5, 'i': 706, 'am': 259, 'thinking': 595, 'an': 981, 'animated': 459, 'film': 2298, 'based': 554, 'classic': 661, 'theodor': 2, 'geisel': 2, 'children': 124, 's': 1590, 'novel': 168, 'young': 377, 'boy': 228, 'quest': 32, 'save': 98, 'trees': 8, '1981': 21, 'feature': 66, 'starring': 1044, 'mel': 62, 'gibson': 41, 'takes': 140, 'place': 109, 'post': 35, 'apocalyptic': 27, 'world': 269, 'australia': 5, 'steven': 119, 'speilberg': 8, 'supernatural': 21, 'story': 377, 'haunted': 12, 'house': 52, 'made': 117, 'many': 64, 'filmgoers': 1, 'afraid': 7, 'clowns': 1, '1997': 21, 'there': 49, 'scene': 53, 'featuring': 146, 'muscians': 1, 'playing': 55, 'deck': 1, 'ship': 47, 'with': 1113, 'julie': 21, 'andre': 4, 'where': 724, 'she': 136, 'sings': 5, 'flies': 2, 'umbrella': 9, 'dick': 13, 'van': 10, 'dyke': 2, 'jbiebs': 1, 'does': 191, 'concerts': 1, 'was': 757, 'banksy': 2, 'star': 186, 'director': 76, 'doing': 12, 'before': 43, 'he': 468, 'created': 28, 'john': 216, 'travolta': 38, 'had': 113, 'one': 275, 'jam': 1, 'saving': 18, 'rod': 4, 'into': 237, 'woman': 224, 'chest': 5, '1959': 18, 'american': 356, 'thriller': 178, 'directed': 695, 'alfred': 39, 'hitchcock': 52, 'cary': 30, 'grant': 31, 'eva': 7, 'marie': 3, 'saint': 4, 'm': 360, '80': 48, 'comedy': 646, 'stars': 589, 'dustin': 20, 'hoffman': 23, 'terri': 3, 'garr': 4, 'character': 278, 'dresses': 5, 'up': 265, 'land': 53, 'job': 37, 'teenage': 24, 'getting': 21, 'bit': 6, 'radioactive': 2, 'arachnid': 2, 'gaining': 2, 'super': 50, 'powers': 26, 'blockbuster': 39, 'science': 116, 'fiction': 116, 'will': 79, 'smith': 22, 'tommy': 13, 'lee': 91, 'jones': 37, 'undercover': 20, 'agents': 11, 'fight': 68, 'intergalactic': 5, 'beings': 7, 'which': 776, 'little': 95, 'girl': 270, 'accidentally': 23, 'lands': 12, 'top': 22, 'witch': 24, 'witches': 10, 'sister': 32, 'vows': 2, 'revenge': 41, 'baseball': 46, 'dennis': 10, 'quaid': 7, 'true': 74, 'pitcher': 3, 'jim': 47, 'morris': 4, '1939': 31, 'adaptation': 67, '1000': 4, 'plus': 2, 'page': 2, 'book': 182, 'clark': 25, 'gable': 14, 'vivien': 6, 'leigh': 11, 'inspired': 34, 'very': 70, 'famous': 271, 'song': 47, 'beatles': 10, 'written': 128, 'paul': 83, 'mccartney': 2, 'wimpy': 1, 'main': 80, 'befriends': 29, 'fierce': 2, 'animal': 31, 'dismay': 1, 'viking': 13, 'community': 17, 'did': 126, 'billy': 38, 'bob': 18, 'thornton': 4, 'win': 43, 'oscar': 136, 'writing': 11, '1979': 24, 'musical': 164, 'loosely': 26, 'life': 297, 'career': 16, 'fosse': 3, 'while': 79, 'shooting': 5, 'pivotal': 3, 'counterculture': 2, 'newman': 22, 'said': 8, 'have': 153, 'eaten': 1, 'over': 93, 'fifty': 2, 'boiled': 1, 'eggs': 3, 'single': 17, 'afternoon': 1, '2012': 295, 'revived': 1, 'pie': 2, 'chain': 3, 'bringing': 6, 'jason': 66, 'biggs': 1, 'alyson': 1, 'hannigan': 1, 'most': 95, 'rest': 5, 'cast': 60, 'from': 625, 'original': 33, 'graphic': 11, 'actor': 71, 'mickey': 22, 'rourke': 7, 'features': 458, 'others': 24, 'such': 16, 'jessica': 16, 'alba': 4, 'carla': 1, 'gugino': 1, 'bruce': 81, 'willis': 65, 'tobey': 5, 'maguire': 6, '2000': 33, 'two': 294, 'warriors': 13, 'chasing': 9, 'stolen': 10, 'sword': 16, 'run': 30, 'skilled': 4, 'nobleman': 2, 'daughter': 52, 'leo': 9, 'dicaprio': 49, 'enters': 5, 'human': 47, 'mind': 22, 'through': 74, 'dreams': 34, 'lets': 4, 'three': 104, 'strangers': 8, 'escort': 1, 'her': 401, 'see': 31, 'strange': 26, 'gentleman': 2, 'fond': 2, 'magic': 22, 'tricks': 2, 'released': 88, '1967': 21, 'featured': 175, 'major': 19, 'portraying': 11, 'group': 207, 'enlisted': 3, 'convicts': 3, 'wwii': 24, 'led': 11, 'marvin': 4, 'award': 95, 'winning': 64, 'spielberg': 113, 'army': 24, 'captain': 28, 'mission': 26, 'rescue': 23, 'sole': 1, 'survivor': 6, 'four': 49, 'brothers': 77, 'after': 202, 'invasion': 27, 'normandy': 7, 'luke': 23, 'skywalker': 21, 'breaking': 3, 'free': 22, 'aunt': 2, 'uncle': 18, 'princess': 60, 'leia': 6, 'off': 142, 'rhyme': 2, 'couple': 73, 'go': 97, 'hill': 44, 'middle': 33, 'aged': 11, 'decides': 33, 'embark': 8, 'adventure': 113, 'bali': 2, 'im': 23, 'manhattan': 9, 'are': 217, 'suddenly': 7, 'dealing': 14, 'issue': 2, 'employment': 2, 'move': 25, 'love': 279, 'rules': 7, 'fuck': 1, 'you': 132, 'thunder': 10, 'your': 17, 'just': 29, 'gods': 13, 'farts': 1, 'seemingly': 4, 'unrelated': 1, 'characters': 82, 'being': 111, 'isolated': 9, 'tortured': 3, 'villain': 40, 'jigsaw': 6, 'name': 870, 'clown': 6, 'fish': 20, 'has': 436, 'lost': 41, 'son': 90, 'fannie': 1, 'flagg': 1, 'tandy': 2, 'kathy': 4, 'bates': 11, 'tells': 161, 'tale': 61, 'strong': 11, 'bond': 30, 'between': 69, 'depression': 16, 'era': 33, 'women': 53, 'want': 32, 'jonah': 28, 'bring': 22, 'alcoholic': 6, 'english': 19, 'rockstar': 2, 'playboy': 6, 'nuisance': 1, 'destination': 2, 'set': 193, 'time': 194, 'psychopath': 3, 'norman': 4, 'shower': 14, 'cinematic': 2, 'history': 15, 'street': 25, 'urchin': 2, 'meets': 35, 'jasmine': 3, 'city': 83, 'they': 173, 'each': 60, 'other': 100, 'but': 143, 'can': 117, 'only': 79, 'marry': 10, 'prince': 48, 'myth': 4, 'gangster': 33, 'keiser': 1, 'soze': 1, 'recounted': 2, 'different': 49, 'perspectives': 3, 'jeremy': 12, 'renner': 9, 'sergeant': 4, 'bomb': 13, 'squad': 6, 'unit': 10, 'during': 161, 'iraq': 10, 'war': 260, 'iconic': 25, 'epic': 101, 'crime': 144, 'man': 514, 'offer': 4, 'ca': 17, 'n': 92, 't': 97, 'refuse': 4, 'literary': 7, 'emily': 25, 'bronte': 2, 'named': 132, 'heathcliff': 2, 'gene': 44, 'kelly': 15, 'friends': 159, 'struggling': 13, 'find': 139, 'work': 60, 'paris': 17, 'things': 26, 'become': 67, 'more': 42, 'complicated': 3, 'when': 145, 'them': 86, 'fall': 49, 'same': 103, 'chris': 42, 'pine': 9, 'hardy': 12, 'cia': 42, 'fighting': 61, 'adventures': 33, 'both': 50, 'giants': 1, 'small': 85, 'people': 180, 'these': 23, 'came': 35, 'out': 216, '2010': 363, 'starred': 305, 'jake': 29, 'gyllenhaal': 13, 'pg': 2, '13': 2, 'disney': 259, 'unappreciated': 1, 'video': 26, 'games': 12, 'leaves': 10, 'game': 43, 'rambunctious': 1, 'rock': 59, 'spin': 15, 'forgetting': 3, 'sarah': 13, 'marshall': 15, 'quentin': 47, 'tarantino': 58, 'jewelry': 4, 'heist': 17, 'wrong': 27, 'j': 52, 'r': 31, 'tolkien': 15, 'hobbits': 4, 'wizards': 8, 'elves': 2, 'dark': 33, 'suspense': 15, 'horror': 238, 'blowing': 6, 'portrait': 3, 'mentally': 10, 'ill': 10, 'retelling': 17, 'fairytale': 8, 'falling': 28, 'at': 216, 'end': 40, '1982': 37, 'also': 69, 'james': 160, 'earl': 6, 'thulsa': 2, 'doom': 5, 'first': 207, 'box': 25, 'office': 25, 'success': 9, 'austrian': 3, 'bodybuilder': 1, 'arnold': 49, 'schwarzenegger': 46, 'taking': 21, 'involves': 76, 'priest': 6, 'old': 125, 'removing': 2, 'demon': 6, 'spirit': 8, 'theater': 13, 'experience': 10, 'equally': 2, 'loved': 6, 'hated': 1, 'pop': 8, 'icon': 3, 'canada': 3, 'some': 65, 'say': 11, 'looks': 6, 'like': 61, 'ellen': 10, 'degeneres': 3, 'filmed': 22, 'around': 150, 'chicago': 18, 'acclaimed': 15, 'leonardo': 54, 'tries': 80, 'fix': 3, '1995': 53, 'detectives': 16, 'following': 20, 'serial': 53, 'killer': 128, 'showcases': 6, 'dante': 3, 'deadly': 31, 'sins': 15, '2011': 301, 'stoner': 6, 'latest': 13, 'instalment': 2, 'series': 208, 'cho': 2, 'kal': 2, 'penn': 15, 'perfect': 15, 'christmas': 76, 'tree': 6, 'scary': 21, 'returns': 25, 'town': 99, 'kill': 51, 'everyone': 17, 'born': 9, 'day': 89, 'supposedly': 3, 'died': 5, 'sequel': 132, '1987': 28, 'stock': 1, 'broker': 6, 'seeking': 16, 'restore': 3, 'empire': 16, 'manufacturer': 3, 'initially': 3, 'inadvertently': 2, 'saves': 15, 'thousands': 8, 'jews': 20, 'certain': 14, 'death': 75, 'holocaust': 21, 'west': 27, 'brooks': 22, 'robert': 199, 'de': 62, 'niro': 48, 'boxer': 71, 'martin': 69, 'scorsese': 47, 'matt': 55, 'damon': 57, 'highly': 8, 'popular': 100, 'espionage': 2, 'trials': 6, 'effects': 10, 'civil': 42, 'rich': 38, 'southern': 16, 'family': 218, 'russell': 65, 'brand': 24, 'rocker': 5, 'aldous': 6, 'snow': 14, 'teen': 35, 'whose': 64, 'halloween': 8, 'night': 49, 'ruined': 3, 'brother': 61, 'missing': 17, 'contains': 22, 'moment': 3, 'deniro': 32, 'looking': 51, 'mirror': 8, 'asking': 7, 'me': 42, 'carell': 35, 'dated': 2, 'awhile': 1, 'learning': 14, 'how': 72, 'pick': 7, 'girls': 29, 'cyberpunk': 1, 'action': 262, 'carpenter': 11, 'kurt': 8, 'snake': 4, 'plissken': 2, '1988': 32, 'nypd': 3, 'officer': 26, 'mcclane': 5, 'jimmy': 23, 'stewart': 57, 'wholesome': 1, 'involved': 22, 'corrupt': 12, 'politicians': 3, 'often': 15, 'referred': 2, 'female': 37, 'companion': 8, 'piece': 17, 'hangover': 5, 'competing': 6, 'determine': 1, 'maid': 11, 'honor': 8, 'glass': 7, 'trophy': 1, 'race': 33, 'winners': 1, 'picture': 73, 'colin': 19, 'firth': 11, 'academy': 91, 'best': 212, 'historical': 19, 'drama': 279, 'yasujiro': 1, 'ozu': 1, 'directs': 6, 'spare': 1, 'emotionally': 2, 'devastating': 1, 'coming': 22, 'visit': 5, 'japan': 9, '1948': 11, 'judy': 20, 'garland': 21, 'fred': 20, 'astaire': 10, 'dancing': 28, 'team': 76, 'drift': 2, 'apart': 13, 'new': 179, 'partners': 6, 'seen': 6, 'exposed': 3, 'chemical': 4, 'spill': 1, 'essentially': 1, 'turning': 6, 'zombies': 15, 'inch': 2, 'living': 56, '14': 4, 'year': 58, 'home': 91, 'tiger': 17, 'get': 141, 'shipwrecked': 4, 'stranded': 6, 'boat': 23, 'models': 3, 'hamlet': 3, 'lions': 8, 'african': 24, 'sahara': 1, 'soft': 2, 'spoken': 3, 'writer': 25, 'completes': 1, 'consider': 3, 'be': 221, 'another': 45, 'title': 112, 'might': 10, 'cold': 12, 'blood': 5, 'author': 18, 'completing': 2, '90': 33, 'blew': 1, 'careers': 2, 'keanu': 33, 'reeves': 38, 'sandra': 11, 'bullock': 9, 'reindeer': 2, 'north': 15, 'pole': 9, 'teased': 3, 'because': 28, 'color': 10, 'nose': 8, 'humphrey': 30, 'bogart': 37, 'considered': 21, 'ever': 28, 'introduced': 13, 'us': 31, 'phrase': 15, 'here': 6, 'kid': 32, '2006': 24, 'awards': 29, 'documentary': 64, 'winner': 18, 'arctic': 9, 'yearly': 1, 'migration': 2, 'grossed': 1, '5': 10, 'nominees': 1, 'sisters': 17, '1800': 6, 'sigourney': 26, 'weaver': 28, 'ridley': 33, 'scott': 48, 'spawned': 22, 'sequels': 15, 'bear': 43, 'pig': 19, '1954': 24, 'stanley': 55, 'donen': 2, 'focused': 9, 'courting': 1, 'females': 2, 'related': 2, 'males': 1, 'itself': 8, 'jack': 119, 'black': 140, 'klumsy': 2, 'lovable': 11, 'miyazaki': 10, '1968': 34, 'sparked': 1, 'zombie': 19, 'craze': 4, 'rabbit': 23, 'kangaroo': 3, 'tigger': 1, 'hitmen': 3, 'gangsters': 12, 'wife': 107, 'intertwine': 2, 'throughout': 6, 'various': 14, 'parts': 12, 'played': 138, 'geena': 4, 'davis': 16, 'susan': 18, 'sarandon': 16, 'take': 84, 'thunderbird': 3, 'drew': 10, 'barrymore': 9, 'justin': 21, 'long': 49, 'relationship': 52, 'even': 22, 'though': 13, 'live': 61, 'opposite': 5, 'coasts': 1, 'grumpy': 7, 'semi': 2, 'bald': 2, 'relives': 3, 'masterpiece': 30, 'cameron': 62, 'crashing': 4, 'iceberg': 9, 'police': 51, 'investigating': 6, 'asylum': 6, 'it': 300, 'seemed': 1, 'friendly': 4, 'adult': 22, 'breakout': 5, 'role': 104, 'melissa': 4, 'mccarthy': 4, 'tomboy': 1, 'ish': 1, 'sexed': 1, 'crazed': 7, 'groom': 2, 'melville': 2, 'monomaniacal': 1, 'sea': 18, 'hunting': 14, 'giant': 45, 'white': 99, 'whale': 12, 'ang': 22, 'nominated': 31, 'foreign': 23, 'language': 17, 'christopher': 31, 'reeve': 8, 'playwright': 10, 'finds': 100, 'way': 69, 'past': 36, 'loves': 15, 'coen': 35, 'moving': 10, 'california': 18, 'producers': 4, '70': 29, 'sylvester': 30, 'stallone': 41, 'philadelphia': 6, 'hal': 15, '9000': 5, 'kubrick': 43, 'him': 178, 'arthur': 17, 'c': 29, 'clarke': 2, '1989': 11, 'close': 12, 'circle': 6, 'shirley': 17, 'maclaine': 9, 'olympia': 2, 'dukakis': 2, 'french': 49, 'jacques': 1, 'yves': 2, 'cousteau': 1, 'louis': 5, 'malle': 1, 'exploring': 1, 'underwater': 2, 'depths': 2, 'oceans': 2, 'shakespeare': 58, 'married': 45, 'kills': 24, 'chinese': 10, 'hong': 8, 'kong': 9, 'triad': 1, 'departed': 2, 'machines': 8, 'believe': 6, 'ryan': 66, 'reynolds': 28, 'test': 10, 'pilot': 17, 'given': 14, 'alien': 95, 'ring': 21, 'gross': 2, 'early': 31, 'lord': 24, 'rings': 13, 'genius': 9, 'peter': 72, 'jackson': 44, 'creates': 5, 'theme': 13, 'park': 13, 'dinosaurs': 8, 'loose': 6, 'downey': 25, 'jr': 38, 'shipped': 2, 'theaters': 5, 'under': 38, 'code': 5, 'maternity': 1, 'recent': 51, 'sience': 1, 'edgerton': 4, 'lead': 41, 'male': 31, 'astronauts': 4, 'crash': 26, 'earth': 70, 'planet': 47, 'humans': 36, 'caged': 1, 'treated': 1, 'zoo': 16, 'animals': 53, 'singleton': 9, 'ice': 14, 'cube': 4, 'cuba': 11, 'gooding': 9, 'really': 34, 'hair': 15, 'stuck': 16, 'high': 89, 'tower': 17, 'lady': 13, 'keeps': 4, 'remade': 30, 'william': 47, 'friedkin': 2, '1977': 23, 'sorcerer': 12, '1953': 13, 'depicts': 52, 'men': 88, 'trying': 157, 'drive': 6, 'truck': 3, 'load': 2, 'explosives': 2, 'across': 35, 'dangerous': 17, 'terrain': 1, 'need': 38, 'says': 19, 'or': 49, 'not': 87, 'armstrong': 2, 'half': 7, 'shots': 1, 'barbra': 14, 'streisand': 23, 'drag': 8, 'ultimate': 7, 'leading': 29, 'soap': 7, 'opera': 12, 'childrens': 5, 'beloved': 12, 'growing': 13, 'pixar': 80, 'animation': 43, 'robot': 56, 'ends': 45, 'future': 82, 'cop': 56, 'crawling': 1, 'building': 19, 'terrorists': 10, 'comes': 49, 'metal': 12, 'ultimatum': 2, 'beat': 11, 'private': 21, 'late': 22, 'nineties': 2, 'involving': 36, 'steal': 21, 'great': 47, 'lois': 2, 'wes': 26, 'anderson': 15, 'adaption': 7, 'roald': 7, 'dahl': 7, 'voices': 38, 'george': 107, 'clooney': 22, 'meryl': 18, 'streep': 19, 'hemsworth': 14, 'dan': 7, 'bradley': 16, 'danny': 25, 'trejo': 5, 'ex': 43, 'federale': 1, 'seek': 15, 'hired': 22, 'assassinate': 5, 'important': 4, 'government': 18, 'official': 2, 'invaded': 1, 'local': 13, 'teenagers': 35, 'natalie': 35, 'wood': 16, 'rita': 3, 'moreno': 3, 'broadway': 22, 'lights': 5, 'owen': 21, 'wilson': 17, 'blonde': 11, 'barbara': 15, '1920': 12, 'fanny': 4, 'brice': 4, 'preteen': 1, 'changed': 10, 'befriending': 3, 'school': 108, 'date': 9, '1998': 33, 'jeff': 27, 'bridges': 19, 'goodman': 8, 'dude': 12, 'mistaken': 8, 'millionaire': 7, 'seeks': 15, 'restitution': 1, 'rug': 2, 'nicholson': 46, 'crazy': 33, 'than': 24, 'usual': 2, 'mental': 18, 'institution': 8, 'nicholas': 36, 'cage': 57, 'rides': 8, 'fire': 9, 'head': 32, 'mad': 18, 'think': 26, 'comic': 57, 'coach': 13, 'assistant': 13, 'using': 14, 'reject': 1, 'players': 9, 'form': 8, 'dorothy': 16, 'dog': 63, 'toto': 7, 'tin': 7, 'cowardly': 2, 'lion': 31, 'scarecrow': 9, 'journey': 35, 'yellow': 20, 'brick': 12, 'road': 35, 'eccentric': 10, 'musician': 7, 'much': 19, 'anticipated': 1, 'shot': 30, 'bunch': 22, 'guys': 43, 'went': 9, 'thailand': 7, 'worst': 8, 'oliver': 21, 'megaton': 1, 'followed': 9, 'fallout': 1, 'bryan': 7, 'mills': 6, 'kidnapped': 26, 'tell': 24, 'russel': 20, 'crow': 4, 'smart': 6, 'deaf': 4, 'baby': 34, 'do': 113, 'know': 45, 'sparks': 11, 'turned': 52, 'miley': 4, 'cyrus': 4, 'fiance': 7, 'liam': 49, 'scorcese': 5, 'lamotta': 10, 'clive': 8, 'barker': 2, 'shows': 53, 'urban': 5, 'legend': 16, 'brought': 24, 'saying': 4, 'ben': 41, 'stiller': 19, 'leader': 25, 'employees': 9, 'cheated': 1, 'money': 30, 'crooked': 1, 'businessman': 15, '1951': 10, 'morley': 1, 'katharine': 8, 'hepburn': 38, 'samuel': 21, 'rose': 11, 'saye': 1, 'reporter': 12, 'keep': 27, 'remarrying': 2, 'country': 48, 'playful': 3, 'romantic': 200, 'sex': 17, 'therapist': 8, 'counsels': 1, 'put': 30, 'excitement': 3, 'marriage': 12, 'ron': 21, 'howard': 27, 'lucas': 30, 'evil': 91, 'queen': 19, 'arrests': 1, 'pregnant': 12, 'montgomery': 1, 'clift': 1, 'murders': 17, 'shelley': 4, 'winters': 2, 'desperately': 3, 'elizabeth': 22, 'taylor': 20, 'stevens': 2, 'holly': 17, 'hunter': 32, 'kidnappers': 2, 'dominic': 5, 'toretto': 2, 'crew': 29, 'plan': 12, 'massive': 8, 'secure': 3, 'franchise': 46, 'teenager': 25, 'york': 75, 'monster': 36, 'been': 50, 'hesitant': 1, 'reprise': 4, 'ripley': 6, 'rejected': 1, 'numerous': 7, 'offers': 3, 'fox': 27, 'studios': 10, 'any': 8, 'fearing': 3, 'would': 43, 'poorly': 1, 'sub': 2, 'par': 1, 'could': 19, 'hurt': 6, 'legacy': 2, 'however': 2, 'so': 37, 'impressed': 2, 'quality': 3, 'script': 5, 'finally': 8, 'agreed': 2, 'archeologist': 3, 'adventurer': 5, 'something': 20, 'hand': 19, 'mid': 10, 'knights': 14, 'portman': 29, 'revolves': 13, 'murder': 72, 'dakota': 4, 'give': 19, 'finding': 21, 'drug': 53, 'change': 16, 'whole': 7, 'must': 97, 'away': 36, 'ruthless': 5, 'thugs': 4, 'documents': 5, 'disfigured': 5, 'famously': 10, 'yells': 2, 'peacefully': 1, 'destroyed': 6, 'danger': 7, 'planets': 1, 'woods': 15, 'lives': 74, 'katherine': 11, 'missionary': 3, 'convinces': 4, 'drunken': 6, 'dilapidated': 1, 'steamboat': 1, 'help': 73, 'torpedo': 1, 'german': 36, '60': 14, 'cult': 50, 'romero': 9, 'attempt': 28, 'survive': 20, 'low': 6, 'brow': 2, 'jealous': 8, 'friend': 69, '2003': 20, 'search': 25, 'several': 35, 'including': 40, 'peanuts': 4, 'gang': 24, 'holiday': 47, 'season': 4, 'kristen': 35, 'bell': 18, 'josh': 12, 'duhamel': 3, 'opposites': 3, 'european': 3, 'lawman': 3, 'wild': 24, 'brings': 14, 'outlaws': 7, '1990': 58, 'savini': 1, 'remake': 62, '1984': 50, 'milos': 5, 'foreman': 3, 'f': 9, 'murray': 24, 'abraham': 6, 'louise': 3, 'fletcher': 3, '1975': 28, 'scientists': 7, 'attacked': 17, 'creature': 27, 'mimic': 1, 'thing': 4, 'squares': 2, 'against': 100, 'alan': 7, 'rickman': 2, 'kids': 47, 'play': 141, 'board': 9, 'found': 19, 'release': 11, 'trapped': 29, 'decades': 4, 'host': 7, 'dangers': 4, 'stopped': 5, 'finishing': 1, 'plot': 73, 'robin': 15, 'williams': 16, 'david': 52, 'naughton': 1, 'u': 22, 'bitten': 3, 'mythical': 11, 'beast': 16, 'backpacks': 1, 'abroad': 2, '3': 53, 'rd': 5, 'selling': 8, 'books': 18, 'british': 84, 'k': 22, 'rowling': 9, 'brad': 59, 'pitt': 57, 'anthony': 27, 'hopkins': 19, 'father': 124, 'remote': 7, 'wilderness': 8, '1900': 6, 'transported': 18, 'nature': 10, 'separated': 9, 'its': 61, 'then': 39, 'adopted': 7, 'transplanted': 1, 'mars': 9, 'vet': 4, 'discovers': 32, 'lush': 1, 'inhabited': 5, '12': 4, 'foot': 3, 'tall': 6, 'barbarians': 1, 'rated': 7, 'spoofs': 1, 'flicks': 2, 'sparkling': 1, 'heart': 19, 'throbs': 1, 'werewolves': 1, '1970': 34, 'altman': 7, 'successful': 16, 'television': 33, 'space': 52, 'latifah': 7, 'dolly': 6, 'parton': 6, 'leaders': 2, 'church': 7, 'choir': 1, 'tyler': 18, 'perry': 15, 'big': 71, 'exported': 1, 'protect': 25, 'herself': 18, 'surely': 3, '1980': 84, 'zucker': 2, 'abrahams': 1, 'if': 38, 'lacking': 1, 'edgar': 7, 'rice': 2, 'burroughs': 2, 'short': 23, 'goldie': 3, 'hawn': 3, 'bette': 14, 'midler': 5, 'diane': 15, 'keaton': 10, '1996': 27, 'divorce': 6, 'strip': 7, 'large': 27, 'dane': 2, 'girlfriend': 19, 'villianous': 1, 'boyfriends': 4, 'order': 45, 'soldier': 33, 'clint': 55, 'eastwood': 59, 'sean': 30, 'tim': 63, 'robins': 1, 'stop': 50, 'nothing': 8, 'catch': 15, 'third': 52, 'installment': 52, 'continues': 4, 'intertwined': 5, 'stories': 17, 'andy': 11, 'woody': 40, 'buzz': 12, 'jackie': 12, 'chan': 9, 'agent': 72, 'babysit': 3, 'eddie': 18, 'murphy': 18, 'doctor': 15, 'talk': 18, 'second': 63, 'trilogy': 49, 'mockumentary': 7, 'loudest': 1, 'band': 28, 'tour': 12, 'evans': 7, 'ready': 6, 'sock': 1, 'ol': 2, 'adolf': 3, 'jaw': 2, 'marvel': 29, 'those': 16, 'spotted': 1, 'dogs': 9, 'running': 27, 'senator': 3, 'sense': 2, 'justice': 5, 'clashes': 2, 'washington': 47, 'system': 5, 'ingrid': 4, 'bergman': 10, 'reunite': 7, 'africa': 16, 'interlude': 1, 'judd': 6, 'apatow': 4, 'rudd': 32, 'knocked': 5, 'zach': 21, 'galifianakis': 15, 'soon': 6, 'having': 26, 'hitch': 2, 'ride': 11, 'aspiring': 9, 'make': 73, 'birth': 13, 'child': 63, 'nuremberg': 2, 'rally': 2, 'leni': 1, 'riefenstahl': 1, '1940': 25, 'adapted': 38, '1950': 21, '000': 5, 'performances': 7, 'violent': 24, 'fincer': 1, 'edward': 26, 'norton': 20, 'mysterious': 31, 'pair': 17, 'infertile': 1, 'decide': 22, 'kidnap': 7, 'andrew': 10, 'garfield': 6, 'our': 14, 'web': 2, 'slinging': 1, 'hero': 77, 'fedora': 1, 'wearing': 14, 'professor': 21, 'max': 7, 'lumet': 3, 'al': 43, 'pacino': 37, 'few': 11, 'cops': 23, 'orphan': 13, 'train': 25, 'station': 13, 'marlon': 27, 'brando': 29, 'talking': 38, 'contender': 3, 'premise': 4, 'mermaid': 6, 'makes': 34, 'faustian': 1, 'bargain': 2, 'mean': 5, 'seahag': 1, 'meet': 20, 'rises': 4, 'avenge': 11, 'parents': 36, 'roman': 26, 'polanski': 18, 'x': 7, 'crumb': 2, 'redford': 32, 'version': 34, 'neil': 13, 'simon': 9, 'outrageous': 2, 'president': 23, 'vampires': 17, 'fantasy': 107, 'garnered': 18, 'nomination': 9, 'direcor': 1, 'allen': 40, '2001': 33, 'league': 7, '1937': 11, 'jean': 17, 'renoir': 9, 'frequently': 6, 'films': 49, 'collection': 3, 'music': 48, 'done': 5, 'tv': 33, 'show': 75, 'club': 14, 'indiana': 14, 'harrison': 47, 'ford': 64, 'track': 26, 'recover': 6, 'legendary': 19, 'biblical': 8, 'bernard': 6, 'malamud': 2, '1952': 9, 'eats': 6, 'poisoned': 2, 'apple': 5, 'fairest': 3, 'benedict': 2, 'slade': 1, 'receives': 5, 'spirits': 11, 'channing': 35, 'tatum': 32, 'adrien': 7, 'brody': 10, 'warsaw': 4, 'ghetto': 7, 'whitney': 1, 'houston': 1, 'jordin': 1, 'steinbeck': 4, 'kate': 31, 'beckinsale': 4, 'returning': 8, 'selene': 2, 'fourth': 17, 'nurse': 11, 'tends': 3, 'badly': 3, 'burned': 5, 'plane': 25, 'victim': 15, '9': 2, 'now': 19, 'shark': 43, 'benchley': 2, 'forbidden': 7, 'secretive': 2, 'cowboys': 7, 'years': 38, 'huge': 8, 'hit': 87, 'blue': 39, 'costumes': 3, '2009': 40, 'records': 3, 'happening': 1, '1942': 13, 'errol': 7, 'flynn': 5, 'portrays': 19, 'real': 72, 'boxing': 22, 'champion': 9, '1971': 22, 'ultra': 5, 'produced': 51, 'burgess': 5, '1962': 25, 'novella': 5, 'richard': 51, 'dreyfuss': 14, 'sees': 15, 'sky': 10, 'compelled': 3, 'devil': 11, 'wyoming': 1, 'tales': 4, 'bad': 48, 'recreates': 1, 'image': 6, 'abolisher': 1, 'bloodsucking': 1, 'monsters': 13, 'bus': 16, 'drivers': 1, 'stay': 17, 'above': 3, '50': 17, 'mph': 3, 'exploding': 3, 'stephen': 26, 'king': 70, 'wrongfully': 2, 'imprisoned': 7, 'yet': 10, 'patiently': 1, 'awaits': 2, 'chance': 15, 'sci': 89, 'fi': 89, 'kingsley': 5, 'indian': 10, 'lawyer': 21, 'activist': 6, 'pacifist': 3, 'prize': 6, 'fighter': 10, 'longshoreman': 4, 'struggles': 26, 'stand': 7, 'union': 13, 'bosses': 8, 'unemployed': 7, 'reputation': 5, 'difficult': 6, 'disguises': 3, 'himself': 57, 'sick': 4, 'wish': 6, 'fountain': 8, 'rome': 12, 'wedding': 15, 'steel': 7, 'accepted': 2, 'peers': 1, 'santa': 24, 'clause': 6, 'hemingway': 3, 'terrence': 2, 'military': 23, 'personnel': 3, 'frog': 3, 'magical': 47, 'spell': 17, 'marion': 2, 'crane': 3, 'room': 2, 'motel': 10, 'crowe': 42, 'mathematician': 7, 'issues': 6, 'describes': 2, 'theory': 4, 'ignoring': 1, 'con': 18, 'settle': 4, 'score': 10, 'mob': 28, 'boss': 23, 'controversial': 15, 'well': 42, 'received': 15, 'homosexuality': 1, 'wayne': 29, 'sons': 3, 'avenging': 3, 'honoring': 1, 'memory': 11, 'mother': 54, 'themselves': 27, 'side': 16, 'kidnapping': 1, 'kurosawa': 13, 'relays': 1, 'independent': 11, 'listener': 1, 'audience': 4, 'showing': 9, 'multiple': 20, 'versions': 7, 'truth': 5, 'adam': 47, 'sandler': 39, 'got': 15, 'teacher': 8, 'tracks': 4, 'clones': 1, 'cheesy': 1, 'titans': 3, 'owl': 4, '2008': 21, 'humorous': 3, 'postmodern': 1, 'genre': 15, 'neighboring': 2, 'gardens': 2, 'montague': 1, 'capulet': 1, 'preparing': 3, 'appearance': 6, 'slasher': 20, 'mask': 15, 'upon': 25, 'shatner': 2, 'likeness': 1, 'line': 40, 'smell': 2, 'napalm': 2, 'morning': 4, 'sheen': 12, 'bright': 2, 'computer': 43, 'devito': 4, 'zac': 9, 'efron': 9, 'swift': 5, 'depicting': 10, 'inhabitants': 8, 'thneedville': 1, 'craven': 19, 'defending': 4, 'invaders': 5, '1994': 32, 'uma': 14, 'thurman': 12, 'hitman': 18, 'bandits': 11, 'cher': 7, 'javier': 6, 'bardem': 6, 'roy': 8, 'scheider': 6, 'seven': 21, 'connected': 2, 'stalked': 9, 'totalitarian': 2, 'society': 16, 'neurotic': 2, 'bug': 2, 'adventurous': 7, 'archaeologist': 4, 'searches': 9, 'egypt': 1, 'ancient': 15, 'relic': 2, '1993': 34, 'mountain': 8, 'climber': 4, 'lithgow': 3, 'wizard': 36, 'look': 21, 'destroy': 20, 'charms': 1, 'contain': 1, 'soul': 9, 'magician': 9, 'alongside': 8, 'actresses': 3, 'sally': 6, 'field': 14, 'macclaine': 1, 'helping': 9, 'carrell': 13, 'adopts': 4, 'g': 6, 'robinson': 9, 'became': 20, 'rico': 3, 'bandello': 1, '1931': 12, 'emma': 30, 'stone': 46, 'gosling': 15, 'embarrassing': 1, 'impediment': 7, 'full': 22, '1957': 16, 'hardened': 1, 'general': 21, 'egged': 1, 'ambitious': 7, 'works': 29, 'fulfill': 6, 'prophecy': 1, 'spider': 6, 'castle': 8, 'weird': 11, 'indie': 4, 'screenwriter': 5, 'case': 18, 'block': 2, 'may': 16, 'next': 16, 'political': 21, 'co': 31, 'neeson': 46, 'claiming': 1, 'institutionalized': 2, 'represented': 1, 'court': 5, 'farce': 1, 'dopey': 3, 'teens': 8, 'phenomena': 1, 'twilight': 6, 'bounty': 18, 'rooster': 5, 'cogburn': 5, 'reunited': 9, 'circumstance': 1, 'loses': 17, 'childhood': 10, 'tragedy': 13, 'overshadowed': 1, 'looses': 3, 'grip': 1, 'reality': 15, 'hockey': 14, 'player': 31, 'control': 21, 'antics': 6, 'creation': 11, 'athletes': 4, 'determined': 11, 'jew': 3, 'devout': 2, 'christian': 18, '1924': 3, 'olympics': 4, 'chameleon': 10, 'winds': 4, 'dirt': 5, 'pete': 2, 'yates': 2, 'fast': 12, 'paced': 4, 'grandfather': 6, 'shakespere': 1, 'pretends': 9, 'slut': 3, 'gain': 7, 'popularity': 1, 'kevin': 81, 'bacon': 10, 'butts': 1, 'heads': 7, 'allow': 2, 'listen': 2, 'roll': 11, 'updated': 5, 'fairy': 26, 'orleans': 7, 'reptile': 1, 'again': 27, 'snyder': 7, 'armor': 2, 'clad': 3, 'owls': 2, 'battle': 42, 'guards': 5, 'row': 7, 'affected': 1, 'charges': 1, 'accused': 24, 'rape': 8, 'gift': 7, 'unusual': 9, 'scout': 12, 'floating': 1, 'newspaper': 10, 'paper': 1, 'sometimes': 3, 'touted': 1, 'falls': 98, 'turns': 32, 'called': 58, 'psychosomatically': 1, 'dumb': 4, 'blind': 6, 'becomes': 56, 'master': 14, 'pinball': 2, '1961': 15, 'pool': 5, 'tear': 6, 'jerker': 3, 'defense': 4, 'undeserved': 1, 'charge': 7, 'final': 40, 'showdown': 3, 'face': 37, 'mount': 8, 'rushmore': 2, 'overcomes': 3, 'captured': 14, 'ken': 4, '1986': 20, 'mystery': 23, 'centers': 55, 'student': 43, 'visiting': 3, 'hospital': 16, 'ear': 5, 'hometown': 4, 'lumberton': 1, 'lucy': 5, 'walker': 6, 'garbage': 1, 'pickers': 1, 'franco': 15, 'zeffirelli': 3, 'crossed': 21, 'lovers': 38, 'families': 22, 'd': 48, 'dreamworks': 27, 'sanders': 2, 'dean': 10, 'deblois': 1, 'association': 1, 'vampire': 47, 'tod': 2, 'browning': 3, 'bela': 1, 'lugosi': 1, 'farm': 18, 'bullied': 10, 'misunderstood': 1, 'ferrel': 3, 'mark': 50, 'walhberg': 1, 'goofy': 4, 'mess': 5, 'johnny': 93, 'depp': 90, 'gentle': 2, 'winona': 7, 'ryder': 9, 'burton': 36, 'favorite': 17, 'skellington': 5, 'misguided': 1, 'transform': 4, 'halloweentown': 1, 'quite': 6, 'farrelly': 3, 'update': 4, 'program': 10, 'concerns': 11, 'clumsy': 4, '6': 4, 'th': 29, 'highest': 11, 'grossing': 9, 'domesticating': 1, 'learned': 3, 'appreciate': 2, 'once': 12, 'fearsome': 1, 'ponders': 1, 'question': 2, 'slyvester': 4, 'heavyweight': 4, 'warren': 13, 'beatty': 17, 'youth': 9, 'intelligent': 12, 'h': 8, 'l': 30, 'superhero': 85, 'style': 5, 'beginning': 11, 'organizations': 4, 'supermutants': 1, 'coast': 2, 'connecticut': 1, 'holden': 2, 'vied': 1, 'affections': 3, 'audrey': 24, 'wilder': 30, 'mexican': 7, 'alfonso': 2, 'cuar': 2, 'modern': 43, 'henry': 21, 'entered': 1, 'witness': 7, 'protection': 6, 'unfulfilling': 1, 'depressing': 1, 'lifestyle': 4, 'solace': 1, 'starting': 6, 'fights': 22, 'underground': 8, 'slums': 2, 'brazil': 5, 'michael': 79, 'crichton': 1, 'masked': 13, 'horrifying': 2, 'my': 27, 'lewis': 27, 'wiig': 23, 'parody': 10, 'macguyver': 1, 'federico': 4, 'fellini': 7, 'marcello': 2, 'mastroianni': 2, 'anita': 1, 'ekberg': 1, 'biographical': 26, 'nash': 7, 'brilliant': 12, 'portrayed': 8, 'wolves': 19, 'oil': 9, 'rig': 1, 'workers': 13, 'alaskan': 3, 'setting': 12, 'september': 4, 'aftermath': 2, 'five': 16, 'chosen': 8, 'eliminate': 5, 'ones': 7, 'responsible': 6, 'fateful': 3, 'travels': 29, 'entirely': 8, 'computers': 1, 'household': 6, 'reprises': 3, 'cartoon': 39, 'hunted': 6, 'according': 5, 'lesbian': 3, 'throws': 1, 'turmoil': 2, 'exclusive': 3, 'golf': 6, 'course': 9, 'deal': 24, 'brash': 2, 'member': 16, 'destructive': 3, 'gopher': 4, 'remarkable': 1, 'narrated': 4, 'morgan': 38, 'freeman': 35, 'kubric': 6, 'ghost': 7, 'rider': 6, 'survivors': 5, 'locked': 3, 'mall': 3, 'undead': 5, '1974': 20, 'lucille': 2, 'ball': 5, 'bea': 1, '2004': 22, 'terrance': 1, 'bolluck': 1, 'focuses': 30, 'racial': 4, 'tension': 1, 'los': 15, 'angeles': 12, 'saga': 9, 'good': 36, 'western': 83, '7': 6, 'gunman': 3, 'protecting': 5, 'village': 13, 'donkey': 10, 'green': 28, 'grandmother': 2, 'wolf': 7, 'stalking': 2, 'jamie': 28, 'curtis': 26, 'babysitter': 4, 'psycho': 2, 'older': 10, 'downing': 1, 'forced': 35, 'trip': 38, 'stranger': 5, 'arrival': 3, 'ii': 49, 'parisian': 2, 'actress': 41, 'hide': 6, 'jewish': 19, 'husband': 28, 'meek': 2, 'hobbit': 4, 'shire': 2, 'eight': 3, 'companions': 3, 'sauron': 4, 'loser': 7, 'fortune': 4, 'changes': 6, 'europe': 3, 'wants': 45, 'hemworth': 1, 'titular': 20, 'orwell': 1, 'parodies': 7, 'communism': 1, 'pet': 12, 'courage': 4, 'prequel': 16, '21': 1, 'st': 7, 'century': 14, 'rat': 4, 'attempts': 15, 'beautiful': 21, 'fonda': 15, 'temple': 1, 'indians': 7, 'southwest': 1, 'francis': 19, 'mcdormand': 7, 'macy': 6, 'minnesota': 2, 'accents': 1, 'patrick': 11, 'harris': 9, 'pesky': 1, 'robbers': 6, 'stray': 6, 'sets': 24, 'japanese': 42, 'camp': 30, 'prisoners': 5, 'revealing': 3, 'tech': 3, 'runs': 17, 'gorilla': 9, 'damsel': 2, 'hostage': 10, 'climbs': 3, 'buildings': 2, 'brendan': 4, 'fraser': 4, 'rachel': 27, 'weisz': 8, 'treasure': 10, 'hamunaptra': 2, 'anime': 7, 'scientist': 17, 'dynamic': 2, 'trio': 15, 'spacey': 22, 'examination': 2, 'mans': 3, 'rivalry': 1, 'sharks': 7, 'jets': 6, 'christie': 5, 'bow': 4, 'wow': 2, 'wins': 16, 'raffle': 1, 'neighborhood': 9, 'flick': 41, 'propelled': 1, 'englund': 1, 'stardom': 4, 'deformed': 6, 'hope': 7, 'never': 25, 'dream': 26, 'superiors': 6, 'hosting': 1, 'dinner': 8, 'celebrating': 2, 'idiocy': 2, 'guests': 4, 'rising': 5, 'executive': 11, 'questions': 6, 'invited': 6, 'guest': 3, 'direction': 5, 'cubric': 1, 'claims': 5, 'acting': 8, 'fame': 7, 'scottish': 11, 'highlands': 1, 'anne': 35, 'hathaway': 25, 'learns': 29, 'heir': 5, 'throne': 7, 'genovia': 2, 'charles': 37, 'dickens': 20, 'lightyear': 7, 'agree': 1, 'accompany': 4, 'submarine': 2, 'travel': 36, 'pepperland': 2, 'hating': 2, 'meanies': 2, 'spooky': 1, 'benicio': 4, 'del': 14, 'toro': 13, 'lawrence': 25, 'talbot': 1, 'aha': 1, 'twisted': 2, 'ballet': 11, 'swan': 3, 'lake': 12, 'havoc': 5, 'arcade': 5, 'cowboy': 7, 'spy': 37, 'length': 5, 'walt': 14, 'wahlburg': 2, 'retired': 20, 'criminal': 27, 'law': 30, 'debts': 2, 'dealers': 3, 'nicolas': 28, 'knight': 7, 'charged': 5, 'task': 4, 'escorting': 1, 'monastery': 2, 'part': 39, 'vehicles': 3, 'personality': 2, 'innocent': 3, 'jail': 11, 'escapes': 12, 'always': 20, 'dreamt': 1, 'fully': 2, 'cgi': 7, 'going': 34, 'rainbow': 1, 'along': 18, 'ralph': 15, 'fiennes': 12, 'gweneth': 1, 'paltrow': 11, 'period': 12, 'fictionalization': 1, 'fishing': 1, 'held': 5, 'freak': 3, 'goldblum': 3, 'ian': 5, 'malcolm': 8, 'romeo': 9, 'juliet': 9, 'concerned': 5, 'efforts': 8, 'phone': 6, 'god': 26, 'facing': 9, 'loki': 6, 'asgard': 4, 'ingmar': 6, 'von': 7, 'sydow': 2, 'chess': 2, 'telling': 14, 'surgeons': 1, 'korean': 6, 'shares': 5, 'available': 1, 'infamous': 14, 'cruella': 2, 'glenn': 5, 'attenborough': 3, 'failed': 8, 'operation': 6, 'market': 7, 'garden': 9, 'talked': 3, 'attack': 18, 'enemy': 16, 'warship': 1, 'wwi': 4, 'bing': 6, 'crosby': 6, 'elephant': 13, 'jennifer': 55, 'aniston': 28, 'gerard': 14, 'butler': 17, 'centered': 43, 'retrieve': 9, 'cure': 2, 'mutation': 1, 'margaret': 6, 'mitchell': 7, 'surviving': 5, 'leslie': 8, 'nielsen': 6, 'jet': 10, 'everything': 7, 'food': 16, 'rains': 3, 'rain': 7, 'witnesses': 8, 'apartment': 13, 'andrews': 15, 'nanny': 9, 'flying': 13, '1965': 19, 'russian': 13, 'poet': 4, 'omar': 6, 'sharif': 5, 'geraldine': 1, 'chaplin': 10, 'shocking': 3, 'water': 22, 'filthiest': 1, 'extremely': 7, 'alike': 3, 'martha': 2, 'vineyard': 2, 'gold': 15, 'fever': 2, 'huston': 12, 'someone': 19, 'dies': 17, 'causing': 4, 'secrets': 11, 'philip': 9, 'androids': 7, 'electric': 4, 'sheep': 9, 'capturing': 2, 'plague': 7, 'soldiers': 25, 'traveling': 39, 'countryside': 3, 'toy': 7, 'bay': 11, 'americans': 8, 'prospector': 2, 'wilds': 3, 'central': 5, 'mexico': 14, '1973': 22, 'linda': 10, 'blair': 7, 'demonic': 5, 'base': 5, 'path': 4, 'closet': 2, 'helen': 8, 'hunt': 16, 'mute': 9, 'pianist': 4, 'sent': 46, 'zealand': 4, 'greek': 21, 'mythology': 5, 'possession': 7, 'catholic': 1, 'defend': 10, 'naive': 5, 'bucked': 1, 'tooth': 3, 'break': 21, 'pornography': 1, 'industry': 6, '1941': 19, 'orson': 16, 'welles': 14, 'last': 49, 'words': 7, 'tycoon': 6, 'ability': 9, 'turn': 32, 'invisible': 8, 'burt': 12, 'jon': 19, 'voight': 5, 'ned': 7, 'ronnie': 1, 'cox': 6, 'river': 13, 'hotel': 26, 'winter': 12, 'spiritual': 5, 'presence': 3, 'influences': 2, 'violence': 16, 'psychic': 3, 'horrific': 5, 'forebodings': 2, 'errant': 1, 'united': 19, 'states': 20, 'bomber': 1, 'drops': 1, 'nuclear': 16, 'warhead': 1, 'russia': 9, 'due': 13, 'schemes': 3, 'cooper': 23, 'ray': 22, 'liotta': 12, 'body': 13, 'puppetry': 1, 'performed': 1, '10': 11, 'stuntman': 4, 'kitchen': 2, 'were': 56, 'without': 18, 'legs': 4, 'expert': 6, 'walking': 3, 'hands': 17, 'eventually': 17, 'nabbed': 1, 'originally': 26, 'wrote': 20, 'mitchum': 2, 'courageous': 2, 'tracking': 7, '4': 14, 'hijackers': 1, 'staring': 41, 'kirsten': 1, 'soderbergh': 12, 'gina': 1, 'carano': 1, 'panda': 12, 'po': 4, 'voiced': 18, 'martial': 34, 'arts': 33, 'lopez': 4, 'alex': 15, 'o': 35, 'loughlin': 1, 'bosley': 1, 'carrey': 22, 'shaved': 1, 'hairline': 1, 'thought': 7, 'cerebral': 1, 'person': 25, 'childish': 1, 'eighteenth': 1, 'vienna': 7, 'composer': 11, 'drunk': 6, 'car': 34, 'threw': 2, 'andersen': 1, 'hackman': 20, 'schwartzman': 1, 'started': 7, 'menswear': 1, 'fashion': 5, 'trend': 1, 'wars': 5, 'flim': 2, 'skipped': 1, 'bail': 6, 'filled': 8, 'mercenaries': 3, 'thinks': 7, 'door': 10, 'denzel': 41, 'charm': 2, 'filmmaker': 5, 'fondly': 1, 'remembers': 2, 'boyhood': 1, 'fascination': 1, 'heath': 14, 'ledger': 14, 'stiles': 4, 'emotional': 5, 'twist': 11, 'dead': 38, 'candy': 9, 'reese': 17, 'pieces': 8, 'ensemble': 24, 'eve': 13, 'ashton': 10, 'kutcher': 9, 'mendes': 8, 'bon': 2, 'jovi': 2, 'among': 16, 'together': 58, 'leads': 23, 'rick': 4, 'deckard': 2, 'cyborg': 13, 'uttered': 1, 'phrases': 1, 'll': 8, 'dave': 7, 'lizewski': 1, 'unnoticed': 2, 'fan': 6, 'boris': 3, 'karloff': 3, '1935': 7, 'twins': 2, 'thanksgiving': 1, 'understudy': 1, 'betrayed': 8, 'margo': 3, 'hung': 1, 'lived': 9, 'louisa': 2, 'alcott': 1, '1860': 1, 'ethereal': 1, 'searching': 27, 'golden': 10, 'retriever': 2, 'rabies': 4, 'bite': 2, 'sharon': 3, 'joe': 18, 'pesci': 14, 'las': 9, 'vegas': 9, 'macbeth': 2, 'meg': 19, 'widowed': 5, 'horrible': 10, 'outlaw': 10, 'dolphin': 4, 'underlining': 1, 'eternal': 2, 'connection': 6, 'warrior': 15, 'murdered': 23, 'rapunzel': 14, 'voice': 47, 'chuck': 5, 'exacted': 1, 'counselors': 4, 'allowed': 1, 'drown': 2, 'known': 54, 'anton': 5, 'corbijn': 4, 'exercise': 2, '1922': 3, 'expressionism': 1, 'w': 5, 'murnau': 2, 'unauthorized': 2, 'bram': 5, 'stoker': 4, 'dracula': 4, 'store': 10, 'avent': 2, 'booksellers': 1, 'production': 7, 'greenwood': 1, 'ms': 4, 'allied': 5, 'pows': 3, 'evacuate': 1, 'tipping': 1, 'hakunamatata': 1, 'samurai': 7, 'point': 7, 'view': 12, 'topher': 4, 'grace': 9, 'anna': 17, 'faris': 8, 'awkward': 6, 'capture': 9, 'attention': 4, 'crush': 3, '1947': 4, 'prominently': 4, 'department': 4, 'integral': 1, 'storyline': 1, '1978': 17, 'revisited': 1, 'zack': 9, 'comedic': 7, 'nia': 3, 'vardalos': 3, 'upcoming': 2, 'nuptials': 2, 'causes': 4, 'stress': 1, 'nobel': 2, 'laureate': 1, 'economics': 1, 'milton': 5, 'stapler': 3, 'threat': 7, 'burn': 1, '1946': 15, 'typically': 1, 'shown': 9, 'investigate': 3, 'virus': 10, 'killed': 39, 'mankind': 8, 'concluded': 2, 'frodo': 11, 'baggin': 1, 'vanquish': 1, 'frank': 28, 'baum': 3, 'far': 7, 'mcadams': 14, 'daniel': 55, 'craig': 36, 'futuristic': 30, 'roddy': 5, 'mcdowall': 4, '1943': 5, 'horse': 24, 'raise': 7, 'boyle': 5, 'convict': 6, 'terrorizing': 4, 'da': 2, 'henreid': 3, 'claude': 4, 'kathryn': 3, 'mccormick': 1, 'dance': 28, 'costner': 20, 'elliot': 5, 'ness': 4, 'connery': 13, 'capone': 6, 'mock': 1, 'rob': 34, 'reiner': 13, 'fictional': 21, 'heavy': 12, 'signorney': 1, 'betty': 5, 'missouri': 2, 'louisiana': 6, 'purchase': 1, 'exposition': 1, 'fair': 8, '1904': 4, 'prostitute': 9, 'plots': 5, 'presidential': 5, 'candidate': 5, 'akira': 10, 'built': 5, 'tellings': 1, 'savannah': 1, 'especially': 1, 'darren': 4, 'aronofsky': 5, 'descending': 1, 'madness': 5, 'pack': 9, 'savior': 5, 'nazi': 15, 'credited': 1, 'killing': 23, 'notorious': 3, 'actually': 20, 'reilly': 4, 'satire': 8, 'fans': 8, 'marx': 13, 'groucho': 3, 'liners': 1, 'seaside': 1, 'comrades': 1, 'funny': 26, 'katie': 10, 'holmes': 5, 'dramatic': 22, 'effect': 1, 'bowie': 6, 'goblin': 3, 'henson': 10, 'fifth': 9, 'outwit': 2, 'swedish': 10, 'paying': 2, 'respects': 1, 'dying': 11, 'corresponds': 1, 'giving': 8, 'moses': 9, 'stage': 10, 'castaway': 2, 'pretty': 10, 'krackens': 1, 'showed': 7, 'medusa': 2, 'douglas': 9, 'financial': 8, 'district': 2, 'kline': 7, 'mismatched': 7, 'interests': 3, 'art': 7, 'reclusive': 1, 'artist': 12, 'puppet': 9, 'lies': 5, 'grow': 8, 'basketball': 23, 'championship': 6, 'sly': 9, 'teams': 9, 'aging': 27, 'merceneries': 2, 'tragic': 19, 'claire': 6, 'danes': 6, 'biopic': 22, 'founder': 3, 'facebook': 13, 'actual': 10, 'events': 26, 'prohibition': 6, 'dystopian': 17, 'debilitating': 1, 'stutter': 4, 'process': 6, 'overcome': 6, 'waters': 7, 'remain': 3, 'pure': 2, 'temptations': 1, 'previously': 6, 'still': 6, 'harm': 1, 'intended': 1, 'victims': 14, 'sleep': 13, 'translated': 1, 'means': 4, '1960': 33, 'journalist': 15, 'week': 6, 'longs': 4, 'convinced': 3, 'cheat': 8, 'entertainment': 5, 'happen': 9, 'spies': 3, 'carey': 5, 'escapee': 2, '1964': 15, 'fleming': 3, 'delves': 1, 'deep': 11, 'smuggling': 5, 'protagonist': 9, 'kyle': 4, 'machlachlan': 1, 'solve': 16, 'england': 22, 'interest': 15, 'literature': 3, 'harper': 10, 'saved': 15, 'employing': 2, 'factory': 12, 'scissors': 3, 'buys': 1, 'tennessee': 3, 'fragile': 1, 'blanche': 3, 'dubois': 2, 'moves': 10, 'watches': 1, 'disappear': 1, 'confederate': 5, 'includes': 27, 'aliens': 28, 'optimus': 2, 'prime': 10, 'bumblebee': 2, 'start': 22, 'gifted': 4, 'youngsters': 2, 'discovery': 8, 'onboard': 1, 'decided': 4, 'send': 7, 'angels': 2, 'humanity': 13, 'cafe': 2, 'rooney': 6, 'mara': 4, 'forty': 2, 'divorced': 10, 'husbands': 6, 'left': 23, 'younger': 11, 'timberlake': 13, 'amanda': 11, 'seyfried': 8, '25': 6, '1933': 5, 'ape': 6, 'state': 17, 'rampage': 3, 'zemeckis': 6, 'touchstone': 1, 'pictures': 10, 'alzheimer': 1, 'develops': 7, 'experimental': 5, 'chimpanzee': 1, 'surreal': 2, 'italian': 27, 'decadent': 2, '300': 2, 'spartans': 1, 'times': 19, 'tina': 6, 'fey': 6, 'identity': 10, 'bored': 3, 'evening': 5, 'ordinary': 4, 'bastian': 1, 'atreyu': 1, 'criminals': 13, 'begin': 5, 'suspect': 5, 'informant': 3, 'simple': 9, 'jewelery': 2, 'terribly': 3, 'chevy': 14, 'renew': 1, 'nevada': 2, 'neo': 15, 'noir': 20, 'roger': 5, 'kint': 2, 'soundtrack': 5, 'avril': 1, 'lavigne': 1, 'hole': 6, 'busta': 1, 'rhymes': 1, 'uses': 26, 'own': 46, 'methods': 2, 'writes': 10, 'hardships': 4, 'ghosts': 13, 'complications': 3, 'arise': 2, 'kansas': 15, 'wicked': 5, 'whales': 3, 'remember': 21, '1930': 19, 'powell': 1, 'nick': 16, 'nora': 4, 'detective': 41, 'crimes': 4, 'animators': 2, 'classical': 11, 'leopold': 1, 'stokowski': 1, 'conducts': 1, 'orchestra': 1, 'continuation': 2, 'students': 12, 'pursuing': 1, 'singing': 22, 'stanwyck': 4, 'macmurray': 2, 'insurance': 11, 'plans': 6, 'robotic': 5, 'creatures': 34, 'enemies': 6, 'lovely': 3, 'spring': 2, 'shrubbery': 2, 'murderer': 13, 'snoopy': 3, 'learn': 18, 'meaning': 9, 'holidays': 12, 'decorating': 1, 'sad': 7, 'adults': 7, 'wasteland': 3, 'kindhearted': 1, 'storyteller': 1, 'remus': 1, 'trickster': 1, 'br': 3, 'er': 3, 'outwits': 1, 'slow': 7, 'witted': 1, 'rule': 10, 'pride': 3, 'universe': 8, 'personalities': 1, 'blurred': 1, 'dramatises': 1, 'pearl': 3, 'harbor': 2, 'mike': 29, 'myers': 17, 'jay': 17, 'roach': 3, 'crystal': 16, 'lend': 5, 'immature': 1, 'encountering': 1, 'crisis': 10, 'moved': 5, 'cross': 14, 'glamorous': 2, 'ed': 14, 'helms': 6, 'wisconsin': 2, 'disillusionment': 2, 'suffers': 7, 'horrors': 6, 'vision': 5, 'builds': 8, 'ingredients': 1, 'jude': 8, 'forest': 13, 'whitaker': 7, 'repossession': 1, 'mambo': 1, 'eric': 8, 'garcia': 1, 'return': 21, 'canadian': 10, 'deals': 20, 'experiments': 2, 'genetic': 4, 'engineering': 1, 'too': 14, 'dc': 13, 'comics': 11, 'transformation': 1, 'batman': 12, 'provides': 7, 'island': 27, 'berk': 1, 'marks': 5, 'thompson': 13, 'attaches': 5, 'crewman': 1, 'ended': 4, 'charlie': 23, 'kareem': 3, 'abdul': 3, 'jabar': 1, 'bill': 24, 'weather': 6, 'days': 12, 'repeat': 2, 'controls': 1, 'touched': 2, 'ufo': 3, 'sighting': 1, 'fantastical': 5, 'instruct': 1, 'build': 5, 'diamond': 5, 'imprisonment': 3, 'budding': 2, 'romance': 57, 'spoiled': 2, 'cocker': 2, 'spaniel': 2, 'scruffy': 2, 'poor': 23, 'mutt': 1, 'hat': 2, 'freddie': 2, 'krueger': 5, 'descent': 2, 'becoming': 13, 'caretaker': 4, 'lots': 7, 'marshmallow': 3, 'goo': 1, 'shaving': 2, 'cream': 2, 'covered': 2, 'actors': 28, 'cassavetes': 1, 'rag': 3, 'tag': 12, 'drugs': 11, 'cats': 5, 'rabbits': 2, 'realizes': 7, 'unhappy': 5, 'needs': 10, 'bestseller': 1, 'self': 16, 'hypnosis': 3, 'hangs': 3, 'grand': 11, 'red': 31, 'freidkin': 1, 'chronicles': 20, 'possessed': 13, 'cunningham': 2, 'dirty': 7, 'politics': 5, 'segel': 10, 'blunt': 17, 'proposal': 1, 'walk': 6, 'aisle': 1, 'winslet': 19, 'passengers': 2, 'doomed': 8, 'voyage': 1, 'wudang': 1, 'swordsman': 4, 'fabled': 3, 'unlikely': 15, 'opponent': 1, 'required': 5, 'extensively': 1, 'ballerina': 11, 'soulmate': 1, 'mammoth': 2, 'sabertooth': 1, 'sloth': 2, 'saw': 6, 'godfather': 3, 'screen': 13, 'davy': 1, 'better': 6, 'dutchman': 1, 'heartbreaking': 1, 'tribulations': 3, 'chow': 11, 'yun': 11, 'fat': 16, 'affleck': 15, 'dramatizes': 1, 'secret': 33, 'iranian': 2, 'hostages': 2, 'create': 13, 'anything': 6, 'races': 4, 'ark': 1, 'priests': 2, 'tcm': 1, 'kind': 13, nan: 1, '1': 6, 'cat': 22, 'owners': 2, 'thief': 18, 'woo': 8, 'genie': 7, 'scared': 4, 'wits': 3, 'audiences': 2, 'simply': 1, 'repeating': 2, 'no': 42, 'dull': 3, 'blooms': 1, 'robots': 18, 'consumerist': 1, 'nightmare': 2, 'pearce': 4, 'uncovers': 6, 'policeman': 5, 'hollywood': 13, 'spawning': 1, 'remakes': 2, 'leatherface': 3, 'football': 13, 'stricken': 4, 'cancer': 26, 'support': 4, 'distance': 5, 'idealism': 1, 'twin': 5, 'watch': 5, 'dual': 3, 'fraternal': 1, 'wacky': 5, 'gary': 17, 'sheriff': 19, 'refuses': 4, 'lines': 10, 'songs': 7, '1985': 20, 'dr': 23, 'emmett': 1, 'brown': 8, 'remarry': 1, 'exhusband': 1, 'behind': 16, 'vacation': 17, 'bumbling': 5, 'thieves': 6, 'stepsisters': 2, 'compete': 6, 'kris': 3, 'kristofferson': 2, 'interwoven': 1, 'redemption': 6, 'overdose': 1, 'restaurant': 6, 'stick': 2, 'reigniting': 1, 'matthew': 25, 'broderick': 13, 'senior': 3, 'cutting': 2, 'unlucky': 4, 'jinxes': 1, 'stealing': 4, 'coins': 1, 'alone': 7, 'vigilante': 3, 'mass': 6, 'jules': 3, 'verne': 2, 'vanessa': 7, 'hudgens': 6, '1969': 17, 'joesph': 1, 'gordon': 32, 'levitt': 21, 'patient': 7, 'maintaining': 1, 'sam': 31, 'obsession': 3, 'juliette': 3, 'binoche': 3, 'willem': 5, 'dafoe': 5, '2002': 31, 'soccer': 4, 'parminder': 1, 'nagra': 1, 'keira': 3, 'knightley': 2, 'idea': 7, 'lot': 12, 'mutated': 1, 'lynching': 1, 'believed': 3, 'cattle': 3, 'rustlers': 1, 'cursed': 6, 'try': 50, 'ending': 13, 'radcliffe': 9, 'special': 29, 'critically': 11, 'flopped': 1, 'gather': 2, 'information': 3, 'wiped': 2, 'population': 5, 'fathers': 5, '2005': 24, 'corpses': 2, 'oskar': 2, 'schindler': 3, 'discovering': 4, 'beliefs': 1, 'carroll': 7, 'agency': 1, 'double': 3, 'faye': 9, 'dunaway': 10, 'psychological': 19, 'convicted': 13, 'rapist': 4, 'attorney': 7, 'helps': 25, 'wahlberg': 23, 'boston': 6, 'campbell': 10, 'replaces': 1, 'chainsaw': 5, 'stripper': 8, 'burlesque': 1, 'emerges': 1, 'domineering': 2, 'perseus': 13, 'andromeda': 1, 'intervention': 1, 'darabont': 2, 'robbins': 14, 'altered': 1, 'dramatically': 1, 'seeing': 8, 'ufos': 2, 'beard': 1, 'told': 25, 'kennedy': 1, 'nobody': 3, 'eat': 6, 'elderly': 8, 'portray': 5, 'hiker': 2, 'literally': 1, 'hard': 13, 'chick': 5, 'seem': 7, 'glasses': 4, 'dress': 10, 'comicbook': 2, 'twelve': 1, 'exes': 4, 'jesse': 13, 'eisenberg': 10, 'zuckerburg': 1, 'harvard': 3, 'manager': 12, 'hogwarts': 5, 'height': 3, 'disco': 4, 'remembered': 2, 'catchy': 1, 'courtesy': 1, 'bee': 1, 'gees': 1, 'suit': 5, 'jockey': 6, 'overwhelming': 1, 'odds': 7, 'oversized': 1, 'primate': 4, 'residing': 2, 'skull': 3, 'lusts': 1, 'fincher': 7, 'singer': 20, 'conmen': 1, 'hughes': 9, 'able': 6, 'admitted': 3, 'psychiatric': 5, 'institute': 9, 'patients': 4, 'aurthur': 1, 'round': 2, 'table': 2, 'hilarious': 10, 'scrapes': 1, 'brighter': 1, 'campy': 3, 'lycanthrope': 1, 'backward': 1, 'timeline': 1, 'dvd': 1, 'mastermind': 9, 'pawns': 5, 'scheme': 13, 'profoundly': 2, 'changing': 8, 'emilio': 4, 'estevez': 4, 'harry': 13, 'stanton': 3, 'neal': 5, 'ali': 5, 'macgraw': 2, 'fated': 5, 'scoot': 2, 'tough': 13, 'foul': 3, 'mouthed': 2, 'concoction': 1, 'delivers': 5, 'chilling': 3, 'performance': 18, 'sequestered': 1, 'innkeeper': 1, 'fagin': 1, 'ringleader': 2, 'pickpockets': 1, 'surrounding': 3, 'pow': 5, 'hell': 10, 'wasted': 1, 'horro': 1, 'roommate': 2, 'obsessed': 12, 'perform': 4, 'seasons': 1, 'fell': 5, 'wagon': 1, 'tanked': 1, 'canon': 3, 'leonard': 7, 'nemoy': 1, 'sang': 4, 'protagonists': 1, 'precursor': 1, 'accident': 15, 'memories': 16, '1992': 34, 'virginia': 1, 'madsen': 1, 'tony': 32, 'todd': 4, 'replaced': 3, 'hook': 4, 'ferrell': 27, 'galifianaki': 1, 'congressman': 3, 'incredible': 2, 'taken': 19, 'tornado': 11, 'lifetime': 3, 'six': 13, 'chipmunks': 2, 'marooned': 1, 'cruise': 32, 'egotistical': 1, 'stockbroker': 1, 'gekko': 2, 'america': 31, 'triple': 8, 'crown': 8, '1966': 13, 'cub': 14, 'simba': 14, 'grows': 15, 'freddy': 6, 'kruger': 2, 'murdering': 6, 'francois': 3, 'truffaut': 5, 'recognized': 1, 'nouvelle': 1, 'vague': 1, 'movement': 3, 'filmmakers': 1, 'focusing': 6, 'antoine': 2, 'doinel': 1, 'continent': 1, 'begins': 12, 'charlton': 13, 'heston': 13, 'act': 7, 'retire': 3, 'anonymously': 2, 'tombstone': 1, 'arizona': 2, 'disrupted': 2, 'eliminating': 1, 'acts': 5, 'air': 10, 'traffic': 1, 'controllers': 1, 'atari': 1, '2600': 1, 'spoof': 12, 'forlorn': 1, 'owner': 17, 'supply': 7, 'absorbing': 1, 'adjustments': 1, 'messy': 2, 'schooler': 2, 'promiscuous': 1, 'rumors': 1, 'beverly': 4, 'angelo': 3, 'comically': 1, 'walley': 2, 'sociopathic': 1, 'punk': 7, 'spree': 4, 'bernardo': 3, 'bertolucci': 3, 'swept': 5, 'lones': 1, 'china': 9, 'ruler': 6, 'ethan': 11, 'hawke': 5, 'whereby': 1, 'oprah': 4, 'winfrey': 3, 'glover': 6, 'vietnam': 31, 'berenger': 3, 'familiar': 1, 'shakespear': 4, 'slightly': 1, 'silent': 20, 'tetsuo': 1, 'biker': 4, 'samberg': 4, 'preacher': 1, 'charlize': 3, 'theron': 3, 'source': 3, 'professional': 17, 'squid': 1, 'android': 2, 'sunglasses': 1, 'lemmon': 19, 'marilyn': 13, 'monroe': 13, 'funniest': 2, 'took': 24, 'pandora': 6, 'satellite': 1, 'orbiting': 2, 'alpha': 2, 'centauri': 1, 'savage': 3, 'treat': 1, 'horses': 3, 'sewell': 1, 'monstrous': 2, 'curse': 5, 'handsome': 2, 'lin': 2, 'furious': 2, 'descends': 2, 'foolishly': 1, 'disposing': 1, 'estate': 3, 'daughters': 4, 'flattery': 1, 'consequences': 3, 'penguins': 7, 'cuban': 12, 'refugee': 3, 'powerful': 22, 'ultimately': 8, 'basically': 3, 'artificial': 9, 'insemination': 2, 'mobsters': 3, 'robbing': 5, 'bank': 25, 'monkey': 4, 'suits': 4, 'revolutionary': 7, 'species': 10, 'ways': 17, '1999': 38, 'gave': 6, 'rise': 12, 'shyamalan': 4, 'endings': 1, 'subtitled': 1, 'episode': 5, 'iv': 4, 'opening': 6, 'scroll': 1, 'ratso': 1, 'rizzo': 1, 'appear': 8, 'trunk': 1, 'hallows': 1, 'public': 3, 'explode': 1, 'india': 8, 'hippie': 7, 'commune': 4, 'garrett': 1, 'hedlund': 1, 'digital': 3, 'abrams': 7, 'filming': 11, 'nina': 1, 'troubled': 17, 'dancer': 11, 'dreamy': 1, 'prevent': 6, 'importation': 1, 'heroin': 1, 'summer': 19, 'unexpectedly': 3, 'discover': 19, 'provide': 6, 'mufasa': 5, 'blond': 5, 'wields': 2, 'hammer': 6, 'earned': 11, 'screenplay': 12, 'villagers': 1, 'hire': 7, 'somewhat': 4, 'boring': 5, 'disgusting': 1, 'diarrhea': 1, '47': 3, 'presented': 1, 'spends': 4, 'entire': 4, 'until': 15, 'stumbles': 9, 'colleague': 1, 'val': 10, 'kilmer': 10, 'dwarf': 6, 'ewok': 2, 'brave': 4, 'archer': 3, 'demi': 4, 'moore': 34, 'swayze': 6, 'pottery': 1, 'open': 8, 'shop': 5, 'sir': 13, 'conan': 1, 'doyle': 2, 'ritchie': 3, 'elwes': 6, 'buttercup': 4, 'elton': 5, 'sculptures': 1, 'almost': 12, 'location': 6, 'outside': 7, 'momma': 2, 'perspective': 4, 'wanting': 6, 'lose': 5, 'virginity': 4, 'audition': 1, 'read': 7, 'auditioning': 1, 'kenneth': 10, 'branagh': 7, 'ocd': 2, 'enchanted': 4, 'compassion': 1, 'paranormal': 5, 'footage': 13, 'abc': 2, 'network': 2, 'wears': 8, 'tie': 3, 'autobots': 6, 'decepticons': 5, 'popularized': 3, 'hello': 4, 'gorgeous': 3, 'peckinpah': 4, 'texas': 12, 'border': 2, 'nimoy': 1, 'shattner': 1, 'humpback': 1, 'compton': 1, 'casual': 2, 'elf': 2, 'dobby': 1, 'sadly': 1, 'lange': 3, 'girlfriends': 5, 'airline': 5, 'whip': 1, 'bum': 2, 'selected': 3, 'idiot': 7, 'angel': 13, 'business': 18, 'existed': 5, 'chewed': 1, 'scenery': 1, 'joan': 4, 'crawford': 2, 'wire': 2, 'coat': 3, 'hangers': 1, 'chooses': 2, 'pursue': 4, 'gritty': 8, 'hank': 3, 'azaria': 3, 'portal': 3, 'terrorizes': 10, 'hanger': 1, 'avoiding': 3, 'hunters': 6, 'target': 6, 'hiding': 6, 'carrie': 4, 'bradshaw': 1, 'retirement': 4, 'slim': 2, 'pickens': 2, 'airplane': 5, 'hacker': 7, 'fishburn': 1, 'morpheus': 1, '2013': 3, 'included': 8, 'seyfriend': 1, 'revolution': 6, 'utters': 2, 'gonna': 4, 'bigger': 4, 'attacks': 4, 'kept': 6, 'revolutionized': 1, 'force': 14, 'portrayal': 10, 'britain': 7, 'monarch': 3, 'spain': 7, 'painful': 2, 'stunts': 5, 'outer': 9, 'horcruxes': 3, 'existence': 5, 'objects': 3, 'wizarding': 2, 'patriotic': 2, 'clean': 9, 'poker': 1, 'robbery': 7, 'brainerd': 1, 'position': 1, 'gory': 2, 'assault': 5, 'omaha': 1, 'beach': 10, 'asleep': 4, 'distinction': 2, 'kenya': 1, 'introduces': 4, 'concepts': 1, 'xenomorphs': 1, 'facehuggers': 1, 'corporate': 7, 'aliases': 1, 'colors': 6, 'ie': 1, 'mr': 17, 'pink': 6, 'mostly': 9, 'later': 20, 'odessa': 2, 'steps': 3, 'sequence': 4, 'odysseus': 1, 'hick': 1, 'follow': 14, 'parent': 3, 'footprints': 1, 'porn': 4, 'overeating': 1, 'vince': 7, 'vaughn': 10, 'catches': 6, 'claim': 7, 'youngest': 1, 'traci': 2, 'officers': 8, 'munchkins': 3, 'gere': 10, 'don': 10, 'cheadle': 2, 'fuqua': 1, 'hulce': 5, 'demons': 4, 'creating': 4, 'masterpieces': 1, 'supervillian': 2, 'defeated': 3, 'call': 10, 'duty': 3, 'distributed': 2, 'number': 14, 'imdb': 2, '250': 1, 'maritla': 1, 'investigator': 14, 'publishing': 2, 'kesey': 2, 'questionable': 2, 'treatment': 3, 'unfit': 1, 'graduates': 1, 'spend': 4, 'cruising': 1, 'buddies': 6, 'heading': 1, 'dancers': 9, 'ogre': 14, 'hippies': 2, 'partied': 1, 'pattinson': 14, 'veterinarian': 3, 'circus': 13, 'split': 3, 'farrel': 1, 'smiling': 1, 'weimar': 1, 'republic': 4, 'gas': 2, 'aang': 3, 'overthrow': 4, 'peace': 3, 'fate': 9, 'manage': 3, 'p': 6, 'join': 6, 'forces': 23, 'smugglers': 2, 'conclusion': 3, 'novels': 13, 'tolkein': 4, 'rogers': 6, 'rowan': 4, 'atkinson': 4, 'pokes': 3, 'emile': 3, 'hirsch': 2, 'olivia': 7, 'thirlby': 2, 'gorak': 1, 'hoth': 2, 'frozen': 6, 'cave': 3, 'ceiling': 1, 'flee': 2, 'bi': 1, 'swoops': 1, 'sellers': 7, 'wibberley': 1, 'tramp': 2, 'yukon': 1, 'eating': 14, 'shoe': 2, 'attends': 1, 'party': 22, '2007': 15, 'gone': 17, '1976': 20, 'numerical': 5, 'spartan': 1, 'persians': 1, 'capital': 2, 'brainwashed': 3, 'gulf': 3, 'vice': 1, 'directing': 3, '1991': 32, 'rehearse': 1, 'lyrics': 2, 'hills': 2, 'alive': 13, 'kay': 2, 'hapless': 12, 'carnival': 3, 'performer': 4, 'caught': 11, 'tarentino': 5, 'nicknames': 2, 'pairs': 6, 'grifters': 2, 'caper': 2, 'helped': 13, 'chases': 3, 'contaminate': 1, 'fort': 1, 'knox': 1, 'contents': 1, 'preston': 2, 'minutes': 4, 'bluff': 1, 'spectacular': 2, 'mothership': 1, 'hear': 5, 'scream': 2, 'lovelorn': 1, 'monolith': 2, 'mandy': 8, 'krypton': 2, 'doc': 4, 'happens': 6, 'holed': 2, 'warehouse': 1, 'duvall': 4, 'confess': 1, 'spike': 20, 'direct': 4, 'rights': 8, 'iteration': 3, 'although': 8, 'hannibal': 7, 'cannibal': 2, 'thomas': 15, 'mcbride': 2, 'exodus': 3, 'cleese': 3, 'jewel': 2, 'filibustering': 2, 'hours': 5, 'wrongly': 5, 'nickelsen': 1, 'adultery': 2, 'mythological': 2, 'banished': 2, 'stuffed': 7, 'astronaut': 6, 'simians': 2, 'troll': 2, 'granter': 1, 'parrot': 1, 'quirky': 7, 'embroiled': 1, 'odd': 5, 'travis': 3, 'bickle': 2, 'imitated': 1, 'outcast': 4, 'jungle': 15, 'pull': 3, 'cons': 2, 'golightly': 5, 'socialite': 5, 'interested': 2, 'former': 46, 'assassin': 19, 'gunned': 2, 'youtube': 2, 'age': 20, 'we': 16, 'neighbor': 13, 'aired': 3, 'jumpsuit': 1, 'dons': 1, 'painted': 1, 'zero': 1, 'mostel': 1, 'springtime': 1, 'hitler': 5, 'germany': 7, 'chocolates': 3, 'wealthy': 14, 'france': 13, 'stuff': 3, 'hamilton': 7, 'ecstatic': 1, 'considering': 1, '1963': 6, 'escaping': 8, 'rooted': 1, 'writings': 4, '20': 9, 'romano': 2, 'leguizamo': 2, 'denis': 2, 'leary': 2, 'takeoff': 1, 'disaster': 10, 'screwball': 13, 'nba': 2, 'jabaar': 1, 'appearances': 2, 'aboard': 12, 'flight': 6, 'awry': 4, 'nerdy': 5, 'escaped': 11, 'commit': 5, 'ward': 8, 'speaking': 6, 'hinders': 1, 'politician': 12, 'raimi': 4, 'quintessential': 3, 'feral': 1, 'adulthood': 2, 'rejects': 2, 'civilized': 1, 'voorhes': 1, 'sexy': 3, 'venture': 2, 'amazing': 9, 'capra': 10, 'cendrillon': 1, 'perrault': 1, 'camera': 9, 'volunteers': 2, 'tearjerker': 3, 'delorean': 3, 'neve': 3, 'terrorized': 3, 'ghostface': 6, 'wore': 5, 'cape': 1, 'fly': 9, 'bowling': 1, 'smoking': 2, 'joints': 2, 'drinking': 3, 'russians': 3, 'dessert': 3, 'intentioned': 1, 'australian': 12, 'elite': 7, 'operative': 3, 'unbeknownst': 1, 'technologically': 1, 'advanced': 1, 'e': 19, 'survival': 7, 'practically': 1, 'moab': 1, 'desert': 13, 'ten': 5, 'memorable': 11, 'protects': 3, 'ancestors': 1, 'sasha': 7, 'baron': 11, 'cohen': 14, 'shines': 1, 'ousted': 1, 'eastern': 4, 'challenged': 2, 'ocean': 12, 'sinks': 10, 'rats': 2, 'commands': 1, 'bidding': 1, 'shoot': 11, 'pulled': 1, 'machine': 10, 'carpet': 3, 'inept': 5, 'burglars': 2, 'met': 4, 'match': 5, 'macaulay': 11, 'culkin': 11, 'sized': 1, 'helper': 1, 'satirical': 5, 'gun': 10, 'culture': 4, 'trade': 6, 'center': 4, 'pentagon': 1, 'albie': 1, 'nervous': 2, 'banks': 9, 'depth': 2, 'affects': 3, 'industrial': 3, 'usa': 5, 'occurrences': 1, 'tape': 2, 'appointed': 4, 'fill': 1, 'vacancy': 1, 'senate': 1, 'carl': 6, 'fredrickson': 1, '82': 1, 'nd': 5, 'segal': 5, 'predominantly': 1, 'ann': 3, 'arbor': 1, 'michigan': 3, 'university': 4, 'sights': 2, 'rwanda': 1, 'naturalist': 1, 'dian': 1, 'fossey': 2, 'trouble': 14, 'actions': 2, 'cause': 4, 'rumpelstiltskin': 3, 'kingdom': 8, 'dispatch': 1, 'warring': 4, 'apaches': 2, 'haruki': 1, 'miyzaki': 1, 'boar': 2, 'maggie': 6, 'bloodthirsty': 2, 'farmhouse': 2, 'catherine': 11, 'hicks': 1, 'developed': 5, 'status': 2, 'gwyneth': 10, 'midst': 4, 'racehorse': 8, 'slight': 1, 'limp': 2, 'superior': 3, 'han': 7, 'solo': 8, 'statham': 15, 'aid': 2, 'choose': 7, 'toys': 15, 'scarlet': 9, 'labute': 1, 'aaron': 6, 'eckhart': 3, 'nation': 7, 'christophe': 1, 'waltz': 3, 'foxx': 13, 'teamed': 2, 'wachowski': 5, 'laurence': 9, 'fishburne': 7, 'supporting': 20, 'acress': 1, 'olive': 1, 'guidance': 3, 'counselor': 4, 'extract': 3, 'organs': 4, 'paid': 3, 'share': 8, 'milla': 3, 'jolovich': 1, 'alice': 11, 'corporation': 4, 'supervisors': 1, 'san': 8, 'francisco': 6, 'idaho': 1, 'nerd': 3, 'immigrant': 7, 'pedro': 2, 'psychopaths': 1, 'paths': 1, 'mcguyver': 1, 'mcfadden': 1, 'slave': 15, 'subsequently': 2, 'emperor': 7, 'irving': 5, 'ruby': 2, 'slippers': 3, 'cousin': 3, 'freed': 3, 'nolan': 11, 'joseph': 27, 'condon': 3, 'shadows': 1, 'gotham': 9, 'bane': 8, 'reveal': 3, 'barrett': 3, 'cavilleri': 2, 'chemistry': 1, 'deny': 1, 'ignore': 1, 'tatou': 1, 'shy': 3, 'waitress': 8, 'nicer': 1, 'quention': 1, 'boomers': 1, 'meeting': 13, 'funeral': 5, 'spending': 6, 'weekend': 8, 'partying': 1, 'chloe': 2, 'griffiths': 1, 'examines': 4, 'distinct': 1, 'angles': 1, 'relate': 2, 'incident': 1, 'points': 5, 'fugitive': 4, 'false': 4, 'ids': 1, 'weapons': 6, 'apprentice': 11, 'mouse': 11, 'judge': 9, 'esteemed': 1, 'reviewing': 1, 'committing': 5, 'reorganizes': 1, 'convince': 7, 'superheros': 7, 'wallace': 10, 'scotland': 6, 'oppression': 1, 'costs': 1, 'principal': 3, 'hayao': 8, 'trek': 3, 'wally': 2, 'described': 3, 'dialogue': 4, 'mongrel': 1, 'labrador': 1, 'inferior': 1, 'monkeys': 5, 'brolin': 5, 'mindbending': 1, 'puerto': 2, 'bemused': 1, 'kemp': 1, 'tenma': 1, 'royale': 2, 'brutally': 4, 'backdrop': 4, 'vehicle': 4, 'recently': 13, 'collin': 1, 'farrell': 7, '1983': 16, 'debra': 4, 'winger': 4, 'sacrificed': 1, 'employed': 2, 'norwegian': 1, 'snowboard': 1, 'surprisingly': 1, 'non': 7, 'gives': 13, 'instruction': 1, 'randomly': 1, 'televised': 3, 'freakish': 1, 'firefly': 3, 'directorial': 5, 'bateman': 16, 'brian': 11, 'bale': 15, 'murderous': 5, 'wall': 8, 'brett': 1, 'easton': 1, 'ellis': 2, 'lea': 3, 'invading': 4, 'terminal': 2, 'lung': 2, 'disease': 8, 'befriended': 1, 'kindly': 1, 'protective': 2, 'klaatu': 4, 'gort': 1, 'farmer': 12, 'joins': 11, 'guerrilla': 4, 'shrek': 11, 'reminds': 1, 'zorro': 2, 'abbott': 1, 'senators': 1, 'pact': 2, 'pennant': 1, 'historic': 7, 'figure': 11, 'peaceful': 2, 'teachings': 1, 'moody': 2, 'olympic': 4, 'runners': 2, 'religious': 5, 'backgrounds': 1, 'sport': 11, 'failing': 2, 'ditch': 3, 'generated': 3, 'statistics': 1, 'unbeatable': 1, 'ties': 3, 'balloons': 9, 'lifelong': 4, 'south': 27, 'riding': 11, 'locke': 1, 'revolved': 3, 'bebe': 2, 'ebenezer': 5, 'scrooge': 8, 'december': 4, 'hybrid': 5, 'fanfare': 1, 'amour': 1, 'garry': 3, 'armed': 4, 'everywhere': 3, 'elliott': 1, 'plastic': 5, 'surgeon': 5, 'rely': 1, 'cover': 5, 'lie': 4, 'westley': 2, 'mcavoy': 10, 'working': 38, '19': 4, 'gregory': 17, 'peck': 17, 'entangled': 2, 'bitter': 3, 'feud': 1, 'internet': 4, 'ecological': 1, 'disappearing': 3, 'marvels': 1, 'aquatic': 3, 'maya': 11, 'rudolph': 10, 'newly': 9, 'sports': 37, 'shouts': 1, 'mediterranean': 1, 'near': 12, 'italy': 15, 'cars': 11, 'vin': 7, 'diesel': 7, 'dad': 11, 'buy': 2, 'grifter': 1, 'tout': 1, 'advantage': 2, 'fortuitous': 1, 'circumstances': 3, 'wrestling': 1, 'promoter': 2, 'kenau': 2, 'slowed': 1, 'moon': 7, 'scorsesi': 1, 'combined': 3, 'talents': 2, 'sorvino': 1, 'happy': 6, 'lucky': 5, 'honest': 3, 'interacts': 2, 'puts': 11, 'antarctica': 2, 'overdoses': 2, 'illegal': 5, 'squeal': 1, 'kaye': 3, 'inn': 1, 'vermont': 1, 'pill': 12, 'increases': 1, 'intelligence': 9, 'chernobyl': 1, 'amity': 7, 'forcing': 2, 'hates': 4, 'rescuing': 3, 'forte': 4, 'wolfgang': 2, 'amadeus': 2, 'mozart': 6, 'berridge': 2, 'indianna': 1, 'alter': 5, 'ego': 5, 'noam': 1, 'chomsky': 1, 'sides': 6, 'abortion': 1, 'debate': 1, 'psychologist': 3, 'cybertronian': 2, 'spacecraft': 2, 'reach': 1, 'daycare': 2, 'brink': 2, 'worlds': 2, 'safe': 2, 'affairs': 2, 'contract': 4, 'unable': 3, 'dates': 1, 'affair': 21, 'tasked': 10, 'scheming': 2, 'duping': 1, 'company': 29, 'cahoots': 1, '1944': 8, 'array': 1, 'designated': 1, '1945': 9, 'brooklyn': 15, 'tenement': 2, 'stuffing': 1, 'inside': 10, 'freezing': 1, 'astro': 1, 'modeled': 1, 'belle': 7, 'conner': 3, 'rookie': 3, 'veteran': 11, 'modus': 2, 'operandi': 2, 'vi': 6, 'details': 5, 'kings': 3, 'ordeal': 1, 'conquer': 2, 'incessant': 2, 'stammer': 1, 'quietly': 1, 'estranged': 4, 'decade': 2, 'braff': 1, '65': 2, 'merlin': 7, 'descendant': 3, 'krige': 1, 'unresisting': 2, 'reardon': 1, 'involvement': 3, 'kitty': 3, 'collins': 7, 'mazursky': 2, 'jill': 5, 'clayburgh': 3, 'use': 23, 'struggle': 10, 'freedom': 12, 'sunken': 2, '1956': 14, 'inhabit': 2, 'citizens': 3, 'bodies': 10, 'fraternity': 4, 'cukor': 4, 'remarriage': 1, 'alternate': 10, 'pertaining': 1, 'oldman': 2, 'mila': 15, 'kunis': 15, 'ridiculous': 2, 'pranks': 2, 'laughs': 5, 'expense': 1, 'pain': 1, 'bilbo': 6, 'baggins': 5, 'gandalf': 5, 'thorin': 1, 'defeat': 13, 'dragon': 7, 'smaug': 1, 'relive': 3, 'ensure': 2, 'jacob': 2, 'spaceship': 9, 'map': 4, 'discovered': 7, 'artifacts': 2, 'cultures': 1, 'stumble': 4, 'distant': 4, 'extinction': 1, 'walter': 11, 'brennan': 1, 'hillbilly': 2, 'sharpshooter': 2, 'sudeikis': 4, 'rid': 5, 'employers': 10, 'studded': 6, 'effectively': 1, 'warn': 1, 'destruction': 9, 'rapid': 2, 'arenas': 1, 'dinosaur': 5, 'themed': 7, 'pro': 4, 'piccolo': 1, 'nolte': 5, 'frankie': 1, 'madison': 1, 'expecting': 4, 'partner': 12, 'bootlegging': 2, 'prepare': 2, 'laugh': 2, 'robber': 4, 'feelings': 3, 'robbed': 1, 'instead': 16, 'scandal': 6, 'feel': 8, 'undersized': 2, 'toby': 2, 'maquire': 2, 'atlanta': 1, 'burns': 3, 'onslaught': 2, 'faces': 7, 'tremendous': 1, 'jedi': 5, 'durante': 1, 'mail': 1, 'narrator': 1, 'frees': 2, 'vintage': 2, 'highrise': 1, 'exciting': 3, 'front': 6, 'thirties': 1, 'capcom': 1, 'dressed': 5, 'monthly': 1, 'event': 3, 'fellow': 6, 'executives': 1, 'cusak': 2, 'brazilian': 5, 'serious': 7, 'inability': 1, 'speak': 7, 'victor': 5, 'hugo': 3, 'hugh': 11, 'jackman': 6, 'candlestick': 1, 'heroine': 4, 'bonds': 3, 'loyalty': 2, 'tested': 2, 'dario': 5, 'argento': 5, 'attending': 1, 'recite': 1, 'thy': 1, '40': 13, 'percent': 5, 'photo': 2, 'realistic': 2, 'meredith': 1, 'correspondent': 1, 'ernie': 1, 'pyle': 2, '18': 3, 'infantry': 1, 'shoes': 3, 'vacated': 1, 'petula': 1, 'collect': 1, 'mony': 1, 'python': 3, 'terry': 15, 'gilliam': 10, 'pilots': 4, 'communicate': 4, 'favreau': 7, 'region': 2, 'sorrowful': 1, 'laying': 2, 'wheelbarrow': 1, 'differently': 1, 'unique': 10, 'researchers': 2, 'secretary': 3, 'theif': 1, 'dropped': 2, 'buddy': 15, 'canyon': 2, 'humfrey': 1, 'supposed': 9, 'convenience': 4, 'staple': 2, 'daddy': 6, 'mist': 1, 'plotting': 3, 'humanoids': 2, 'harmony': 1, 'worship': 1, 'goddess': 1, 'eywa': 1, 'addresses': 1, 'social': 17, 'class': 22, 'focussing': 1, 'frenchman': 1, 'tribesman': 1, 'massacre': 1, 'beethoven': 2, 'section': 3, 'hate': 8, 'bigotry': 1, 'explodes': 2, 'overweight': 5, 'advice': 3, 'harvey': 10, 'embellished': 1, 'scar': 5, 'right': 17, 'burger': 1, 'dazzles': 1, 'hearts': 1, 'minds': 1, 'darth': 9, 'vader': 8, 'results': 3, 'stylized': 1, 'forgot': 1, 'cosner': 2, 'hears': 4, 'corn': 4, 'swap': 4, 'renolds': 1, 'kimble': 1, 'battling': 10, 'greater': 3, 'basis': 3, 'yul': 7, 'brynner': 4, 'blasts': 1, 'unrecognizable': 1, 'apes': 8, 'amusement': 4, 'blast': 1, '1934': 9, 'spanish': 4, 'inquisition': 1, 'coppola': 21, 'origins': 4, 'vito': 1, 'corleone': 3, 'sun': 2, 'cell': 1, 'phones': 1, 'cameras': 5, 'boys': 5, 'preparation': 2, 'spent': 6, 'bedtime': 1, 'amnesiac': 1, 'ludlum': 5, 'introducing': 2, 'daily': 3, 'eskimo': 1, 'screams': 1, 'used': 16, 'nonhumans': 1, 'fake': 8, 'highway': 1, '66': 3, 'effected': 1, 'problems': 9, 'angelina': 16, 'jolie': 17, 'transporting': 2, 'nitroglycerin': 1, 'outfit': 3, 'smurf': 1, 'cupcake': 1, 'east': 6, 'hits': 3, 'theseus': 2, 'zeus': 10, 'hyperion': 1, 'poe': 5, 'mafia': 11, 'beasley': 1, 'di': 12, 'caprio': 11, 'crashes': 5, 'retiring': 3, 'bag': 2, 'million': 6, 'dollars': 4, 'pirate': 11, 'cycling': 1, 'puppets': 3, 'kermit': 4, 'miss': 2, 'piggy': 2, 'concept': 2, 'considerably': 1, 'gruesome': 4, 'teeth': 1, 'flesh': 7, 'pus': 1, 'sores': 1, 'list': 4, 'skinned': 3, 'felines': 2, 'nevus': 1, 'mode': 2, 'transportation': 2, 'risk': 1, 'starvation': 1, 'terrible': 8, 'cannibalistic': 3, 'rural': 4, 'areas': 2, 'expose': 4, 'fianc': 2, 'pullman': 1, 'heroes': 11, 'autobiography': 4, 'ages': 4, 'present': 10, 'maturing': 1, 'pip': 2, 'christina': 8, 'aguilar': 1, 'chorus': 1, 'aguilera': 5, 'owned': 1, 'mississippi': 1, 'mixed': 11, 'minor': 12, 'telekinesis': 4, 'wiseguy': 1, 'pileggi': 1, 'tattoo': 3, 'assassinating': 1, 'largest': 3, 'ladd': 2, 'gunslinger': 4, 'tires': 1, 'range': 2, 'oda': 1, 'mae': 1, 'whoopi': 9, 'goldberg': 10, 'admirer': 1, 'hers': 2, 'despise': 1, 'jobs': 5, 'drummers': 3, 'johan': 2, 'venue': 1, 'concert': 15, 'slapstick': 3, '1958': 8, 'attributes': 1, 'na': 2, 'ruffalo': 3, 'clash': 2, 'penny': 2, 'grandson': 3, 'heaven': 3, 'touches': 1, 'inspires': 1, 'contact': 5, 'travelers': 2, 'monique': 1, 'abusive': 7, 'grossly': 1, 'obese': 2, 'illiterate': 3, 'arm': 9, 'surfing': 4, 'pfizer': 2, 'viagra': 3, 'challenges': 2, 'congress': 3, 'exhausting': 1, 'filibuster': 2, 'speech': 10, 'champ': 1, 'interferes': 1, 'stephenie': 2, 'meyer': 4, 'filmd': 1, 'assigned': 2, 'dozen': 2, 'murderers': 2, 'assassination': 6, 'shakespearean': 10, 'bickering': 1, 'hakuna': 4, 'matata': 4, 'tonight': 1, 'bio': 1, 'kissed': 2, 'underdog': 5, 'hawaii': 4, 'kiss': 6, 'rolling': 1, 'surf': 1, 'honolulu': 1, 'sinatra': 1, 'angela': 8, 'felissa': 1, 'shamalan': 2, 'psychiatrist': 2, 'haley': 7, 'osment': 4, 'flew': 1, 'thornhill': 1, 'chased': 8, 'scotsman': 3, 'independence': 2, 'pancakes': 1, 'fabian': 1, 'ask': 4, 'sandberg': 1, 'waste': 3, 'collecting': 2, 'embarks': 3, 'die': 13, 'solving': 2, 'project': 8, 'endangers': 1, 'tokyo': 9, 'rampaging': 2, 'psionic': 1, 'raucous': 1, 'bachelor': 7, 'asia': 2, 'shootout': 3, '78': 2, 'tying': 1, 'disappearance': 5, 'happenings': 6, 'plaguing': 1, 'suburban': 5, 'strict': 3, 'ninja': 2, 'woodie': 1, 'doll': 10, 'buzzlightyear': 1, 'longer': 5, 'currency': 1, 'eddy': 1, 'exotic': 4, 'superstar': 1, 'dwelling': 1, '1917': 1, 'photograph': 2, 'scientific': 1, 'evidence': 3, 'fairies': 2, 'danish': 1, 'marries': 6, 'piglet': 2, 'alec': 4, 'guiness': 1, 'dimensional': 1, 'ferguson': 4, 'kane': 2, 'hodder': 1, 'word': 9, 'cabin': 6, 'destinies': 1, 'tried': 4, 'romantically': 2, 'wesley': 4, 'snipes': 3, 'ib': 1, 'joy': 1, 'least': 6, 'wanted': 12, 'acadamy': 1, 'pizza': 7, 'delivery': 6, 'driver': 8, 'gyllenhal': 2, 'develop': 5, 'complex': 2, 'intimate': 1, 'incompatible': 1, 'abandoned': 4, 'ringing': 1, 'bells': 1, 'transforms': 4, 'shaggy': 1, 'jodie': 12, 'foster': 14, 'montagues': 1, 'capulets': 1, 'ticket': 5, 'sweet': 4, 'dances': 3, 'poseidon': 1, 'lightning': 6, 'bolt': 1, 'entry': 4, 'heroic': 5, 'la': 8, 'karaoke': 2, 'clownfish': 6, 'zoe': 7, 'saldana': 7, 'ren': 8, 'zellweger': 8, 'worker': 16, 'situation': 5, 'expected': 4, 'celebrated': 4, 'woos': 1, 'conceives': 1, 'treasury': 1, 'zachary': 5, 'levy': 1, 'volatile': 2, 'vickie': 1, 'novelist': 1, 'graham': 2, 'greene': 2, '1949': 7, '8': 7, 'poison': 2, 'charming': 10, 'relationships': 5, 'wholesale': 2, 'uncover': 8, 'buried': 4, 'raised': 9, 'siezes': 1, 'kung': 10, 'fu': 11, 'lansbury': 4, 'paige': 2, 'hara': 6, 'robby': 2, 'benson': 1, 'sacha': 4, 'tyrant': 3, 'rafting': 2, 'marked': 5, 'collaboration': 3, 'christoph': 2, 'raindrops': 1, 'dicken': 3, 'stingy': 2, 'fenway': 1, 'ugly': 4, 'unintelligent': 1, 'moments': 3, 'jenny': 2, 'eludes': 2, 'ago': 7, 'activities': 4, 'mobster': 8, 'associates': 1, 'tagline': 4, 'belong': 1, 'wishes': 9, 'navigate': 3, 'desolate': 3, 'grotesque': 1, 'trust': 2, 'famed': 10, 'activity': 2, 'debut': 11, 'names': 5, 'distress': 1, 'gabriel': 4, 'burne': 1, 'crossing': 2, 'assumable': 1, 'covers': 6, 'voyages': 1, 'explorers': 2, 'dispatched': 1, 'weyland': 1, 'stieg': 1, 'larsson': 1, 'disgraced': 1, 'mikael': 1, 'blomkvist': 1, 'investigates': 6, 'patriarch': 5, 'niece': 4, 'critics': 4, 'claimed': 2, 'uncomfortable': 1, 'overt': 1, 'homosexual': 4, 'themes': 2, 'piano': 4, 'revolving': 2, 'explores': 7, 'method': 1, 'chronicling': 3, 'naturally': 1, 'widely': 2, 'inaccurately': 1, 'depicted': 11, 'jumanji': 1, 'softer': 1, 'pale': 2, 'ricci': 1, 'headless': 1, 'horseman': 1, 'entrepreneur': 1, 'caribbean': 2, 'attraction': 1, 'floats': 2, 'ton': 2, 'attached': 1, 'bar': 2, 'morocco': 4, 'konami': 1, 'ahab': 2, 'crib': 1, 'macchio': 5, 'wax': 5, 'miyagi': 2, 'ebsen': 2, 'switched': 4, 'exit': 1, 'allergic': 1, 'reaction': 2, 'costume': 5, 'ginger': 1, 'cole': 2, 'porter': 1, 'comeback': 6, 'prisoner': 5, 'jailer': 1, 'conflict': 5, 'schizophrenia': 3, 'gigantic': 3, 'deaths': 3, 'warfare': 2, 'algiers': 1, 'algeria': 1, 'gettng': 1, 'beds': 1, 'scare': 5, 'addition': 3, 'mounted': 1, 'sequences': 1, 'burstyn': 1, 'every': 15, 'fitted': 2, 'prosthetics': 1, 'necks': 1, 'emaciated': 1, 'pound': 2, 'nine': 5, 'wigs': 1, 'snl': 3, 'kristin': 5, 'breakdown': 3, 'invention': 1, 'princeton': 1, 'spaghetti': 5, 'lean': 6, 'rousing': 2, 'toole': 9, 'arabia': 3, 'arab': 2, 'tribes': 2, 'turks': 1, 'fired': 3, 'starts': 15, 'dating': 4, 'cute': 4, 'chapter': 1, 'werewolf': 3, 'agreement': 1, 'wives': 6, 'committed': 9, 'bridal': 1, 'hans': 2, 'zimmer': 1, 'rza': 5, 'quiet': 3, 'drawn': 5, 'runaway': 7, 'christin': 1, 'saturday': 7, 'cigarettes': 1, 'unborn': 2, 'intense': 12, 'neighbors': 5, 'morpurgo': 1, 'curtiz': 5, 'technicolor': 4, 'nyc': 11, 'narcotics': 2, 'bureau': 2, 'onto': 5, 'katniss': 1, 'everdeen': 1, 'seth': 24, 'rogen': 10, 'screewriter': 1, 'reiser': 1, 'baruchel': 8, 'today': 2, 'upset': 2, 'piss': 1, 'parting': 1, 'austin': 1, 'waiting': 1, 'inspiring': 3, 'fab': 2, 'banned': 8, 'care': 12, 'type': 14, 'shreck': 1, 'antonia': 1, 'baderez': 1, 'wielding': 5, 'helena': 3, 'bonham': 2, 'carter': 5, 'insomnia': 1, 'maker': 5, 'jodi': 1, 'bombed': 2, 'missed': 1, 'crazies': 1, 'mtv': 2, 'knoxville': 2, 'tenth': 1, 'anniversary': 2, 'exacts': 1, 'prosecuting': 1, 'renowned': 3, 'rapper': 3, 'funky': 1, 'posing': 2, 'captures': 1, 'makers': 1, 'borat': 1, 'reclaim': 6, 'spot': 2, 'suess': 5, 'dredd': 1, 'bella': 3, 'coven': 1, 'engage': 5, 'madcap': 3, 'hijinks': 2, 'hall': 2, 'int': 1, 'whimsical': 3, 'balloon': 2, 'lifted': 1, 'earn': 4, '$': 1, '100': 10, 'kidnapper': 1, 'dunham': 1, 'ventriloquist': 1, 'debbie': 1, 'hilary': 5, 'swank': 8, 'accompanied': 1, 'zither': 1, 'dunbar': 2, 'intolerable': 1, 'aberration': 1, 'forth': 1, 'ny': 3, 'fran': 2, 'nic': 2, 'teutonic': 2, 'jarmusch': 3, 'zen': 1, 'saucer': 1, 'intent': 3, 'warning': 1, 'curb': 1, 'aggressions': 1, 'reveals': 2, 'jk': 3, 'persuade': 2, 'makeshift': 1, 'glory': 4, 'oddball': 1, 'psychopathic': 1, 'psychedelic': 2, 'escapades': 2, 'whats': 6, 'hypocritical': 1, 'swinging': 2, 'raising': 6, 'satiric': 1, 'elements': 5, 'bible': 8, 'quoting': 1, 'wash': 2, 'miguel': 1, 'arteta': 1, 'moscow': 2, 'via': 1, 'kidnaps': 3, 'torments': 2, 'philosophy': 3, 'arabian': 4, 'lamp': 3, 'yrs': 1, 'julianne': 9, 'glamorized': 1, 'unimpressionable': 1, 'frankly': 8, 'dear': 7, 'damn': 8, 'orphans': 2, 'mustafa': 1, 'selina': 1, 'gomez': 4, 'cassidy': 3, 'leighton': 3, 'meester': 3, 'summoned': 2, 'skeptic': 1, 'grad': 1, 'researching': 2, 'interment': 1, 'camps': 2, 'infected': 1, 'dashiell': 1, 'hammett': 1, 'eye': 14, 'spade': 3, 'lebowski': 1, 'slacker': 2, 'avid': 1, 'bowler': 1, 'philly': 1, 'apollo': 3, 'creed': 3, 'weight': 4, 'interconnected': 2, 'niven': 2, 'quinn': 3, 'defends': 7, 'raping': 2, 'montana': 4, 'aidan': 1, 'members': 10, 'ludlow': 1, 'thors': 1, 'weapon': 2, 'recovered': 1, 'lonely': 6, 'hidden': 7, 'sharni': 1, 'vincent': 3, 'hip': 6, 'hop': 6, 'competed': 1, 'natascha': 1, 'mcelhone': 1, 'asked': 6, 'pavel': 1, 'antipov': 1, 'pasha': 1, 'surprised': 3, 'offered': 2, '1955': 6, 'sergio': 11, 'leone': 10, 'robards': 1, 'cheyenne': 1, 'abilities': 9, 'higher': 1, 'standing': 5, 'frederic': 1, 'forrest': 2, 'warm': 1, 'olivier': 4, 'hawk': 1, 'adaptations': 1, 'encouraged': 2, 'vicious': 7, 'cartel': 8, 'partially': 3, 'letter': 11, 'victorian': 2, 'miser': 4, 'rebel': 5, 'suceed': 1, 'pursuits': 1, 'bodham': 1, 'pods': 3, 'fro': 1, 'sullivan': 2, 'keller': 3, 'bancroft': 4, 'patty': 2, 'duke': 3, 'jory': 1, 'caterpillars': 1, 'mischievous': 2, 'normal': 3, 'sentimental': 1, 'raft': 2, 'hungry': 3, 'procreate': 1, 'seventeen': 1, 'aristocrat': 2, 'claimant': 1, 'luxurious': 2, 'titanic': 3, 'brandishes': 2, 'mighty': 2, 'experiences': 7, 'unexplainable': 1, 'terror': 6, 'example': 2, 'voorhees': 2, 'figures': 3, 'bride': 8, 'waking': 1, 'coma': 5, 'bent': 2, 'sheepdogs': 2, 'herd': 1, 'hoggett': 2, 'rodriguez': 7, 'slashes': 1, 'corruption': 5, 'megan': 4, 'awarded': 2, 'cynically': 2, 'selfish': 1, 'bearing': 3, 'lessons': 8, 'delivered': 1, 'asks': 8, 'grown': 6, 'naked': 1, 'horribly': 5, 'established': 4, 'trademark': 1, 'idolize': 3, '1936': 5, 'tornados': 1, 'metro': 3, 'goldwyn': 4, 'mayer': 2, 'vera': 1, 'rosemary': 3, 'oh': 2, 'tap': 3, 'libyans': 1, 'flux': 1, 'capacitors': 1, 'loreans': 1, 'skateboards': 1, 'touching': 1, 'impossible': 2, 'bizarre': 4, 'llama': 2, 'joyce': 1, 'deed': 1, 'sentence': 5, 'dwayne': 8, 'johnson': 15, 'retells': 5, 'pranking': 1, 'ni': 2, 'celine': 1, 'dion': 1, 'childless': 3, 'bury': 1, 'backyard': 2, 'containing': 1, 'infant': 1, 'shortage': 1, 'budget': 11, 'arnie': 1, 'enjoys': 1, 'honey': 5, 'bailey': 7, 'rediscovers': 1, 'beauty': 7, 'already': 1, 'deer': 9, 'passing': 1, 'hitchhock': 1, 'unforgettable': 3, 'steals': 8, 'employer': 4, 'client': 3, 'cut': 5, 'relies': 1, 'rumor': 4, 'mill': 3, 'advance': 2, 'botched': 2, 'horrifically': 1, 'eyes': 3, 'blessed': 1, 'reel': 1, 'showman': 2, 'undergo': 2, 'procedure': 2, 'erase': 1, 'sour': 1, 'curry': 7, 'sarrandon': 1, 'lazy': 2, 'tired': 4, 'substance': 3, 'abuse': 4, 'drifter': 2, 'agrees': 4, 'stands': 2, 'inheritance': 2, 'lawn': 5, 'ornaments': 1, 'friendship': 12, 'knowing': 3, 'bikini': 3, 'extras': 2, 'graduation': 2, 'ceremony': 4, 'thirsty': 2, 'crawls': 1, 'promotion': 1, 'gathering': 2, 'purpose': 2, 'mocking': 1, 'whatever': 2, 'fools': 2, 'maniac': 2, 'superheroes': 7, 'asgardian': 2, 'levit': 1, 'dictator': 6, 'climbing': 3, 'eiffel': 2, 'twice': 3, 'quick': 2, 'overselling': 1, 'intentionally': 2, 'lesser': 2, 'segment': 2, 'walrus': 2, 'masks': 2, 'fur': 2, 'flowers': 2, 'cricket': 1, 'conscience': 3, 'prove': 4, 'worthy': 2, 'awaited': 2, 'within': 6, 'programs': 2, 'training': 8, '200': 2, 'puppies': 3, 'experiencing': 2, 'cheats': 2, 'cheerleader': 1, 'glimpse': 1, '1300': 1, 'necronomicon': 2, 'falsely': 3, 'marlin': 4, 'classicmovie': 1, 'impeccable': 1, 'capabilities': 1, 'craft': 3, 'promote': 1, 'contemporaries': 1, 'cocaine': 10, 'poking': 1, 'becca': 1, 'torn': 4, 'dramas': 1, 'prom': 2, 'sending': 1, 'waits': 1, 'mcconaughey': 6, 'democracy': 2, 'calls': 3, 'radio': 6, 'vents': 1, 'walks': 3, 'shoots': 2, 'servants': 2, 'putting': 1, 'beyond': 3, 'native': 4, 'ravaged': 1, 'technology': 3, 'replicating': 1, 'engaged': 3, 'area': 5, 'pay': 8, 'residence': 5, 'furter': 2, 'salesman': 3, 'worldwide': 1, 'producing': 1, 'aggressive': 2, 'praying': 1, 'mantis': 1, 'persons': 2, 'matter': 2, 'seuss': 10, 'recycles': 1, 'meyers': 1, 'prominent': 1, 'otherwise': 1, 'fantasia': 3, 'teaching': 4, 'skills': 8, 'reluctant': 3, 'otherworldly': 2, 'psychotic': 7, 'confronts': 2, 'tennis': 1, 'complete': 5, 'seems': 9, 'couples': 7, 'leave': 4, 'nightmarish': 2, 'biomechanical': 1, 'sexual': 7, 'unknown': 10, 'origin': 3, 'truckers': 1, 'nostromo': 1, 'surrealist': 1, 'hoping': 4, 'awesome': 5, 'imaginative': 2, 'since': 9, 'harder': 2, 'sergei': 3, 'eisenstein': 2, 'filmmaking': 3, 'sewed': 1, 'skins': 2, 'silly': 7, 'medieval': 4, 'bunny': 3, 'sure': 4, 'hokey': 1, 'cusack': 9, 'coddry': 1, 'madeup': 1, 'focusses': 1, 'feline': 4, 'mains': 1, 'rom': 2, 'com': 2, 'common': 13, 'tosses': 1, 'shield': 2, 'suspenseful': 2, 'frightening': 2, 'delighting': 1, 'generations': 1, 'eliminated': 1, 'thirty': 1, 'encounters': 6, 'proprietor': 1, 'domination': 2, 'witty': 2, 'defies': 2, 'norms': 2, 'farmboy': 1, 'cowhand': 1, 'talks': 9, 'trail': 8, 'hiring': 3, 'invents': 1, 'step': 4, 'hooks': 2, 'definitely': 1, 'profound': 3, 'destroying': 2, 'erich': 4, 'maria': 6, 'remarque': 3, 'witchcraft': 1, 'wizardry': 2, 'mandatory': 1, 'fitting': 1, 'supports': 1, 'courier': 1, 'service': 3, 'completely': 6, 'fit': 1, 'felt': 2, 'spinster': 1, 'problem': 12, 'accept': 2, 'witnessing': 8, 'meaningful': 1, 'reason': 2, 'statue': 3, 'bazooko': 1, 'yard': 3, 'reconnect': 1, 'proves': 1, 'surprise': 3, 'phillip': 7, 'why': 8, 'syrena': 1, 'chalices': 1, 'sparrow': 10, 'threats': 3, 'unlisted': 2, 'miami': 7, 'florida': 6, 'quickly': 1, 'establishes': 3, 'weary': 2, 'gunfighter': 4, 'homestead': 1, 'smoldering': 1, 'settler': 1, 'rancher': 1, 'guardians': 2, 'flic': 1, 'combines': 3, 'explosive': 2, 'underwent': 1, 'grueling': 1, 'except': 2, 'spared': 1, 'resent': 1, 'convey': 1, 'resentment': 1, 'idolized': 1, 'bynes': 1, 'announcing': 1, 'returned': 4, 'unite': 3, 'threatening': 3, 'strangely': 1, 'idealized': 1, 'frustrating': 2, 'northern': 1, 'overtaken': 1, 'feathered': 1, 'employeer': 1, 'knife': 2, 'stabbing': 1, 'animate': 1, 'specifically': 2, 'magically': 5, 'whenever': 3, 'packs': 2, 'groups': 5, 'arkansas': 2, 'housewife': 8, 'whisked': 3, 'barnabas': 4, 'ancestral': 1, 'dysfunctional': 5, 'descendants': 1, 'tops': 1, 'lists': 1, 'rep': 2, 'fraud': 1, 'arouses': 1, 'suspicions': 1, 'should': 3, 'racing': 7, 'orwellian': 1, 'pigs': 4, 'ran': 4, 'establishment': 2, 'insides': 1, 'skip': 1, 'heder': 4, 'alienated': 2, 'presidency': 2, 'mcclain': 3, 'fought': 7, 'terrorist': 7, 'caring': 3, 'miniature': 1, 'twenty': 5, 'wonders': 2, 'kendrick': 6, 'ladies': 3, 'cappella': 2, 'competition': 11, 'annasophia': 1, 'robb': 1, 'sleeve': 1, 'occur': 1, 'digitally': 1, 'removed': 1, 'directer': 1, 'alcohol': 3, 'antonio': 6, 'banderas': 4, 'furry': 4, 'happened': 7, 'implanted': 3, 'executes': 2, 'sinister': 3, 'sydney': 4, 'pollack': 1, 'views': 4, 'madeleine': 2, 'kahn': 2, 'bogdanovich': 1, 'hollywoods': 1, 'biggest': 4, 'flops': 1, 'beneath': 1, 'boulder': 2, 'faced': 4, 'decision': 5, 'titled': 4, 'hg': 1, 'wells': 6, 'martians': 2, 'matheson': 2, 'brashear': 1, 'trained': 6, 'acotr': 1, 'walsh': 1, 'bondsman': 1, 'moscone': 1, 'accountant': 1, 'jonathan': 10, 'mardukas': 1, 'grodin': 3, 'freemon': 1, 'sinful': 2, 'bradely': 1, 'allows': 4, 'allowing': 1, 'brain': 5, 'capacity': 2, 'breakthrough': 3, 'tracy': 7, 'rare': 4, 'card': 4, 'volkswagen': 1, 'beetle': 1, 'revised': 1, 'chevrolet': 1, 'camaro': 1, 'assailant': 2, 'reopen': 2, 'site': 4, 'drowning': 1, 'sanctuary': 2, 'regarding': 3, 'disorder': 2, 'forgotten': 1, 'chaos': 4, 'harassed': 1, 'locals': 2, 'leaving': 2, 'ensues': 5, 'deceased': 4, 'oz': 3, 'mocked': 1, 'stoltz': 1, 'excels': 2, 'despite': 8, 'facial': 1, 'deformity': 1, 'upside': 5, 'columbus': 2, 'chuckie': 3, 'dwarfs': 1, 'swordsmen': 2, 'sworn': 1, 'rapacious': 1, 'claudette': 5, 'colbert': 5, 'flashes': 1, 'hitchike': 1, 'heiress': 3, 'flags': 1, 'protege': 2, 'ailing': 1, 'recruiting': 2, 'handicapped': 1, 'galifiniakis': 1, 'crom': 1, 'driven': 7, 'thirst': 1, 'brutal': 5, 'slaughter': 1, 'gothic': 2, 'destined': 2, 'lynch': 5, 'lawnmower': 2, 'dax': 2, 'shepard': 2, 'june': 2, 'collects': 1, 'reaching': 2, 'waterboarded': 1, 'torture': 6, 'seconds': 1, 'per': 2, 'freaky': 2, 'grew': 2, 'starrin': 1, 'unfortunate': 2, 'arc': 1, 'praised': 1, 'posters': 1, 'immortal': 1, 'forever': 3, 'grim': 3, 'reaper': 1, 'obi': 2, 'wan': 2, 'kenobi': 2, 'tatooine': 1, 'meme': 1, 'dur': 1, 'mistakenly': 2, 'germans': 1, 'synonymous': 1, 'gale': 2, 'month': 2, 'georgine': 1, 'darcy': 2, 'torso': 1, 'relaxing': 2, 'marrying': 4, 'discrete': 1, 'months': 2, 'seduce': 2, 'mrs': 6, 'increasingly': 2, 'harassment': 1, 'debt': 1, 'fulfilling': 1, 'fantasies': 1, 'renegade': 2, 'elia': 4, 'kazan': 4, 'inge': 2, 'sultry': 1, 'scarlett': 11, 'none': 3, 'naomi': 1, 'watts': 1, 'orange': 6, 'indigo': 1, 'salieri': 2, 'nicolson': 2, 'nominations': 4, 'strongman': 1, 'bradbury': 3, 'architect': 2, 'infiltrates': 2, 'implants': 1, 'ideas': 4, 'sleeping': 2, 'subconscious': 1, 'ranked': 1, 'wohlberg': 1, 'attempting': 11, 'mini': 1, 'coopers': 1, 'website': 2, 'creator': 4, 'cunning': 1, 'charismatic': 4, 'drinks': 2, 'bourbon': 1, 'succeeding': 1, 'seventh': 1, 'christmastime': 1, 'slashers': 1, 'rack': 1, 'count': 3, 'globe': 6, 'frat': 1, 'reading': 3, 'conrad': 1, 'darkness': 4, 'loud': 2, 'keeper': 1, 'model': 1, 'silver': 3, 'nathan': 8, 'lane': 13, 'produce': 2, 'flop': 7, 'quote': 16, 'gena': 1, 'rowlands': 1, 'cassvetes': 1, 'ad': 2, 'libbed': 1, 'performs': 1, 'danced': 1, 'gershwin': 2, 'vicente': 1, 'fang': 1, 'goldfish': 2, 'unforgiving': 1, '45': 1, 'birthdays': 1, 'atticus': 3, 'finch': 5, 'penned': 1, 'hailed': 7, 'pays': 1, 'homage': 2, 'brawny': 1, 'colonel': 2, 'woodrow': 1, 'dolarhyde': 1, 'watched': 4, 'strippers': 4, 'heard': 4, 'posthumous': 1, 'cheating': 4, 'joad': 1, 'pursued': 4, 'minded': 1, 'bronson': 3, 'sinking': 6, 'liar': 2, 'priceless': 2, 'statuette': 1, 'situations': 4, 'berrymore': 1, 'golfing': 1, 'joke': 1, 'raved': 1, 'anymore': 4, 'hilariously': 4, 'burial': 1, 'nathalie': 1, 'benefits': 3, 'rode': 1, 'feared': 2, 'surfers': 1, 'wave': 3, 'kenji': 1, 'mizoguchi': 1, 'concubine': 1, 'stigma': 1, 'sold': 5, 'labor': 2, 'rightful': 1, 'acrophobia': 1, 'interaction': 1, 'laid': 2, 'talent': 3, 'sign': 2, 'packed': 14, 'stepmother': 4, 'aluminum': 1, 'diaz': 15, 'elementary': 2, 'simultaneously': 1, 'inmates': 7, 'banker': 7, 'raspy': 1, 'rogan': 5, 'yearns': 1, 'teach': 6, 'cubs': 2, 'wherein': 1, 'gnomes': 4, 'pretended': 1, 'suicide': 6, 'stared': 7, 'hamill': 2, 'schools': 3, 'confused': 2, 'jaded': 1, 'partake': 1, 'brawls': 1, 'napoleon': 2, 'lizard': 9, 'ralphie': 3, 'teachers': 2, 'types': 1, 'items': 4, 'mohr': 1, 'stalks': 3, 'removes': 1, 'clothing': 2, 'spunky': 1, 'hamster': 1, 'realize': 2, 'tinman': 1, 'lover': 10, 'searing': 1, 'heat': 2, 'pets': 5, 'ranch': 1, 'woodland': 5, 'royal': 5, 'sketch': 2, 'arthurian': 2, 'johansson': 5, 'entertaining': 1, 'dealer': 6, 'jung': 4, 'countries': 2, 'truly': 1, 'clashing': 1, 'ballerinas': 2, 'canyoneering': 1, 'tragically': 3, 'arguably': 2, 'performing': 5, 'muggles': 1, 'factions': 1, 'differences': 1, 'forward': 1, 'broke': 3, 'cera': 4, 'lloyd': 3, 'invent': 1, 'device': 2, 'elevator': 3, 'victorious': 1, '1865': 1, 'nicknamed': 4, 'chief': 4, 'broom': 2, 'roommates': 2, 'variety': 5, 'dreaming': 1, 'terrorize': 5, 'wiggles': 1, 'rehabilitative': 1, 'quintin': 2, 'tarintino': 1, 'greusome': 1, 'cuts': 3, 'pours': 1, 'engagement': 2, 'lasts': 1, 'pierce': 6, 'brosnan': 3, 'ewen': 1, 'mcgregor': 6, 'minister': 9, 'fullest': 2, 'physician': 1, 'fallen': 5, 'ranger': 1, 'bound': 3, 'cleaning': 5, 'therefore': 2, 'examples': 1, 'eradicate': 1, 'lycan': 1, 'uninhabited': 1, 'responsibility': 2, 'buscemi': 3, 'experiment': 6, 'robe': 3, 'dragons': 5, 'vikings': 1, 'marijuana': 1, 'blake': 8, 'lively': 6, 'label': 1, 'intern': 7, 'labels': 1, 'reunion': 2, 'individuals': 3, 'sidekicks': 1, 'traits': 1, 'cripple': 1, 'joss': 5, 'whedon': 5, 'dudley': 5, 'hayley': 2, 'message': 4, 'phoenix': 6, 'hustlers': 4, 'aunts': 1, 'insane': 10, 'destroys': 4, 'mordor': 3, 'ridden': 4, 'regular': 4, 'weatherman': 1, 'hunky': 1, 'jerry': 7, 'mcguire': 2, 'insecurity': 1, 'freeing': 1, 'risks': 3, 'proportions': 1, 'disowns': 1, 'noticeable': 1, 'pass': 2, 'framed': 2, 'attracted': 3, 'belushi': 3, 'toga': 2, 'parties': 1, 'el': 1, 'mariachi': 1, 'bandaras': 1, 'salma': 3, 'hayek': 3, 'mcdowell': 8, 'hosts': 1, 'reproduce': 1, 'pre': 5, 'subservient': 1, 'orangutans': 1, 'chimpanzees': 2, 'pierre': 4, 'boulle': 1, 'humankind': 2, 'catastrophic': 1, 'malevolent': 1, 'michelle': 10, 'pfeiffer': 4, 'journalists': 2, 'annette': 4, 'benning': 3, 'thora': 1, 'birch': 1, 'brenner': 1, 'sidekick': 11, 'wager': 1, 'believes': 6, 'commoner': 2, 'flighty': 1, 'brightens': 1, 'emerge': 1, 'positive': 1, 'strapped': 5, 'fairly': 1, 'bean': 1, 'limited': 3, 'universal': 4, 'afflicted': 1, 'lupine': 1, 'mildly': 1, 'retarded': 1, 'exploits': 4, 'dozens': 1, 'suzanne': 2, 'befriend': 5, 'miyaggi': 1, 'teaches': 10, 'blow': 2, 'hillary': 3, 'ruth': 2, 'citizen': 1, 'savant': 1, 'hershey': 3, 'urinate': 1, 'result': 2, 'hang': 2, 'ups': 4, 'downs': 3, 'wind': 3, 'skyscraper': 2, 'classics': 1, 'poem': 3, 'allan': 4, 'blown': 1, 'encounter': 9, 'amphibian': 2, 'frost': 4, 'dominant': 1, 'expires': 1, 'purchased': 3, 'beforehand': 1, 'hostile': 2, 'extra': 5, 'terrestrial': 4, 'stalk': 3, 'granny': 1, 'housing': 2, 'cfo': 1, 'ponzi': 3, 'crossdresses': 1, 'identities': 2, 'melanie': 2, 'griffith': 2, 'sells': 3, 'abducted': 1, 'molested': 1, 'pedophile': 1, 'sleeper': 2, 'visits': 2, 'reinvent': 1, 'purchasing': 1, 'villa': 1, 'killers': 4, 'inevitable': 1, 'defoe': 1, 'stationed': 1, 'forster': 2, 'gaiman': 1, 'viola': 1, 'historically': 1, 'luxury': 4, 'liner': 5, 'reduced': 2, 'miracle': 1, 'jesus': 1, 'cleaver': 1, 'inspector': 2, 'juvenile': 1, 'punks': 1, 'displays': 1, 'rather': 7, 'cecile': 2, 'b': 8, 'demille': 3, 'burrough': 1, 'undying': 1, 'loving': 2, 'sadistic': 2, 'temper': 4, 'poitier': 1, 'originated': 1, 'virgil': 2, 'tibbs': 1, 'sperm': 6, 'donor': 5, 'actively': 1, 'blossoms': 1, 'infamously': 1, 'passenger': 4, 'witnessed': 1, 'jane': 9, 'newlyweds': 1, 'valjean': 1, 'ntozake': 2, 'shange': 2, 'feminist': 1, 'phylicia': 1, 'rashad': 1, 'janet': 10, 'wildness': 1, 'mate': 4, 'mary': 10, 'connor': 11, 'guillermo': 8, 'faun': 3, 'exorcisms': 3, 'exorcism': 4, 'renee': 3, 'harmless': 1, 'multi': 2, 'talented': 5, 'effective': 1, 'purportedly': 1, 'parker': 9, 'interrupted': 1, 'derailment': 1, 'errors': 1, 'online': 2, 'anythig': 1, 'particualr': 1, 'sorry': 1, 'eh': 1, 'anybody': 1, 'stupid': 2, 'negative': 1, 'nancy': 2, 'sit': 2, 'watching': 5, 'jump': 3, 'fear': 6, 'fillini': 1, 'ruins': 2, 'quinten': 3, 'terentino': 1, 'delfonics': 1, 'motorcycles': 1, 'liked': 2, 'asunder': 1, 'mom': 12, 'suggestions': 1, 'plant': 4, 'parodied': 2, 'usually': 2, 'guess': 2, 'mann': 5, 'stresses': 1, 'hitting': 1, 'milestone': 2, 'identify': 1, 'receive': 2, 'shia': 7, 'lebouf': 2, 'swarzenegger': 1, 'menace': 2, 'warheads': 1, 'bed': 1, 'depends': 2, 'timeless': 9, 'include': 5, 'imagine': 2, 'bloody': 3, 'misnomer': 1, '24': 2, 'enlists': 8, 'mutants': 5, 'superhuman': 6, 'malicious': 1, 'iii': 2, 'security': 2, 'parkinson': 6, 'sena': 2, 'dystopic': 1, 'detroit': 2, 'terminally': 2, 'wounded': 3, 'submerged': 1, 'haunting': 3, 'slang': 1, 'term': 1, 'squeeze': 1, 'yoda': 2, 'rescues': 2, 'perhaps': 4, 'staff': 2, 'interviewed': 1, 'tuskegee': 3, 'airmen': 1, 'access': 1, 'logbooks': 1, 'siam': 3, 'esteem': 1, 'inaccuracies': 2, 'perceived': 2, 'disrespect': 2, 'monarchy': 2, 'muppets': 1, 'needed': 2, 'closing': 1, 'muppet': 1, 'tobacco': 3, 'smoke': 1, 'pumped': 1, 'tube': 1, 'slid': 1, 'pant': 1, 'leg': 5, 'shirt': 1, 'dinklage': 1, 'achondroplasic': 1, 'auditioned': 1, 'washes': 1, 'ashore': 1, 'attitudes': 2, 'seal': 1, 'rocks': 2, 'neptune': 1, 'islands': 1, 'landis': 3, 'tourists': 5, 'admit': 1, 'exists': 1, 'instant': 1, 'straight': 3, 'enforcer': 4, 'clans': 1, 'tradition': 5, 'slaying': 1, 'adder': 1, 'mcinnerny': 1, 'reported': 1, 'accompanies': 1, 'haddock': 2, 'tenessee': 1, 'stella': 1, 'broken': 3, 'anxiety': 1, 'prepares': 1, 'hospitalized': 2, 'disability': 1, 'scuba': 1, 'diver': 1, 'concoct': 1, 'headed': 7, 'fuehrer': 1, 'enraged': 1, 'rant': 2, 'memes': 1, 'fail': 1, 'smash': 7, 'ariel': 1, 'dweller': 1, 'finale': 4, 'reaches': 2, 'billionaire': 4, 'storm': 2, 'knocks': 1, 'atop': 1, 'veers': 1, 'admiral': 1, 'ozzel': 1, 'incompetence': 1, 'loner': 2, 'stepmom': 1, 'infested': 2, 'foe': 4, 'busy': 3, 'crate': 1, 'adorable': 4, 'succumbing': 2, 'greed': 4, 'gay': 9, 'addict': 2, 'suffering': 4, 'professionals': 2, 'record': 10, 'investigation': 2, 'suspicious': 1, 'package': 4, 'irish': 6, 'sharing': 2, 'lifestyles': 3, 'apparently': 3, 'closes': 1, 'interact': 5, 'repent': 1, 'riotous': 1, 'critter': 1, 'golfers': 1, 'programed': 1, 'detest': 1, 'plastique': 1, 'item': 2, 'larry': 5, 'cable': 3, 'tow': 1, 'spinning': 6, 'cogs': 1, 'cope': 4, 'possible': 5, 'dwarves': 3, 'plunder': 1, 'eras': 1, 'tribute': 2, 'jj': 1, 'aprams': 1, 'presents': 1, 'schoolkids': 1, 'sophisticated': 2, 'mcmurphy': 2, 'voluntarily': 1, 'lesley': 1, 'request': 1, 'unleash': 1, 'revealed': 3, 'mcdonald': 1, 'mac': 1, 'rodridguez': 1, 'stahl': 1, 'bastard': 1, 'refer': 1, 'dastan': 1, 'safeguard': 1, 'dagger': 4, 'hippy': 2, 'fbi': 5, 'cool': 1, 'amy': 13, 'adams': 15, 'pfieffer': 1, 'recreate': 1, 'zookeeper': 1, 'occupying': 1, 'lifeboat': 1, 'bengal': 1, 'yippy': 1, 'ki': 3, 'ya': 2, 'expletive': 1, 'resort': 4, 'switching': 2, 'transferred': 1, 'canine': 1, 'millions': 3, 'slimer': 1, 'halls': 1, 'prestigious': 2, 'momsen': 1, 'cindy': 1, 'lou': 2, 'homicidal': 2, 'xd': 1, 'electricity': 1, 'utter': 2, 'knightly': 1, 'jordan': 4, 'parallax': 1, 'threatened': 2, 'newcomer': 2, 'fabulous': 1, 'taste': 1, 'corral': 1, 'purebreds': 1, 'harness': 1, 'storybook': 1, 'liberty': 1, 'tongue': 1, 'pirates': 2, 'birds': 6, 'genres': 4, 'collide': 1, 'regain': 4, 'tactics': 1, 'lips': 3, 'miraculous': 2, 'landing': 2, 'marlowe': 1, 'blackmail': 1, 'sleeps': 1, 'credit': 4, 'foil': 1, 'thwart': 1, 'antagonist': 5, 'rodents': 3, 'size': 2, 'brute': 1, 'norway': 1, 'rothstein': 1, 'chucky': 1, 'entity': 4, 'satan': 2, 'mega': 2, 'desire': 3, 'mcfarlane': 3, 'fredo': 1, 'lay': 1, 'gyllenhall': 2, 'amount': 1, 'worthington': 6, 'kraken': 2, 'diagnosed': 3, 'stalone': 1, 'write': 4, 'insists': 1, 'crying': 1, 'encourage': 1, 'veterans': 4, 'irreparably': 1, 'roofer': 1, 'caused': 7, 'potions': 2, 'headmaster': 1, 'maguical': 1, 'conspire': 2, 'pension': 2, 'fund': 2, 'doldrums': 1, 'tiny': 11, 'keyhole': 1, 'rocky': 4, 'decker': 6, 'sing': 2, 'rapes': 1, 'primates': 4, 'caution': 1, 'fasten': 1, 'seatbelts': 1, 'bumpy': 1, 'clarice': 3, 'lecter': 3, 'instantly': 1, 'matchsticks': 1, 'repeatedly': 2, 'announce': 1, 'excellent': 2, 'autistic': 4, 'xavier': 1, 'recruit': 2, 'cyclops': 1, 'iceman': 1, 'thrown': 3, 'groupie': 1, 'carolina': 2, 'propels': 1, 'mainly': 2, 'plain': 1, 'whisper': 1, 'rosebud': 3, 'plagued': 4, 'insanity': 3, 'toxin': 2, 'contaminated': 6, 'gylenhaal': 3, 'injured': 3, 'reporters': 1, 'decipher': 1, 'raunchy': 3, 'geeks': 4, 'singapore': 1, 'refuge': 2, 'lame': 1, 'binoculars': 1, 'chapman': 3, 'britons': 3, 'grateful': 1, 'pod': 2, 'doors': 2, 'transvestite': 3, 'transsylvania': 1, 'decapitated': 1, 'preview': 2, 'suffer': 2, 'cloned': 2, 'exhibits': 1, 'amok': 1, 'bridesmaid': 1, 'threaten': 2, 'upend': 1, 'pastry': 1, 'chef': 2, 'claustrophobic': 1, 'boredom': 1, 'filth': 1, 'sheer': 1, 'wings': 4, 'onstage': 1, 'random': 4, 'shopping': 2, 'surrounded': 2, 'annoying': 2, 'rehab': 1, 'alcoholism': 1, 'surfer': 3, 'obstacles': 3, 'losing': 4, 'mathematical': 1, 'janitor': 2, 'protaganist': 1, 'protech': 1, 'malfunction': 1, 'jacuzzi': 1, 'portrayle': 1, 'frances': 6, 'megatron': 1, 'starscream': 1, 'deceptions': 1, 'sofia': 5, 'johansen': 1, '23': 3, 'lorraine': 1, 'insisted': 2, 'studio': 7, 'drop': 1, 'upper': 4, 'picks': 3, 'rigs': 1, 'squalid': 1, 'succession': 1, 'bars': 2, 'motels': 1, 'pumpkin': 4, 'sitting': 1, 'knocking': 1, 'pittsburgh': 2, 'iran': 6, 'amidst': 2, 'crumbling': 1, 'cagney': 6, 'hoodlum': 2, 'ranks': 2, 'underworld': 10, 'cohan': 2, 'warner': 3, 'bros': 4, 'wonder': 2, 'mastrantonio': 1, 'easy': 5, 'trains': 2, 'rebelled': 1, 'donna': 3, 'reed': 4, 'elwood': 1, 'dowd': 1, 'lindbergh': 2, 'creepiest': 1, 'gaynor': 1, 'brien': 4, '1927': 4, 'ghibli': 7, 'relentless': 1, 'gabin': 1, 'stroheim': 1, 'lundegaard': 1, 'henchmen': 3, 'bungling': 1, 'persistent': 2, 'marge': 2, 'gunderson': 1, 'ingenuity': 1, 'bravery': 1, 'aviation': 1, 'jodelle': 1, 'ferland': 1, 'roasted': 1, 'bonfire': 1, 'roast': 1, 'oven': 1, 'iris': 1, 'merrick': 1, 'sympathetic': 1, 'lautner': 6, 'hatter': 3, 'johny': 1, 'lends': 1, 'bought': 1, 'spans': 2, 'sales': 3, 'fozzy': 1, 'lieutenant': 2, 'outposts': 1, 'exist': 5, 'bullies': 2, 'reappear': 1, 'impending': 1, 'disagrees': 1, 'domestic': 1, 'nearly': 4, 'decaprio': 2, 'peoples': 2, 'thoughts': 1, 'zeta': 8, 'tormented': 2, 'studying': 1, 'persecution': 2, 'goof': 1, 'screened': 2, 'bbc': 3, 'april': 1, 'root': 1, 'theatrically': 1, 'marines': 3, 'pendleton': 2, 'educating': 1, 'marine': 12, 'thank': 1, 'sneak': 1, 'march': 2, 'teddy': 13, 'kurtz': 1, 'buck': 2, 'watson': 4, 'misfit': 3, 'apocalyotic': 1, '2020': 1, 'glen': 2, 'homicide': 3, 'melodramatic': 1, 'let': 7, 'allegedly': 2, '15': 2, 'pounds': 1, 'fassbender': 7, 'cannes': 2, 'festival': 2, 'generation': 1, 'penelope': 4, 'cruz': 4, '30': 8, 'artificially': 1, 'inseminated': 1, 'beau': 1, 'involve': 3, 'micheal': 5, 'towns': 1, 'goods': 2, 'entrepreneurs': 2, 'wide': 1, 'ne': 1, 'yo': 1, 'rival': 13, 'skeleton': 3, 'software': 2, 'writers': 2, 'account': 3, 'clicking': 1, 'heels': 3, 'pal': 2, 'transport': 2, 'hazardous': 1, 'materials': 2, 'proper': 1, 'handling': 2, 'turban': 1, 'ashby': 1, 'paramount': 2, 'interesting': 5, 'lineman': 1, 'repairman': 1, 'thousand': 2, 'polish': 6, 'refugees': 2, 'holocost': 2, 'surface': 2, 'lurking': 1, 'bedroom': 1, 'closets': 1, 'specific': 3, 'staham': 1, 'steak': 1, 'lunch': 2, 'chic': 1, 'gowns': 1, 'poisoning': 4, 'strikes': 4, 'unladylike': 1, 'behavior': 1, 'scouting': 1, 'assassins': 6, 'await': 1, 'terroist': 1, 'voicing': 4, 'wrangling': 1, 'chauffeur': 1, 'makaws': 2, 'taker': 1, 'childs': 1, 'pillow': 1, 'unbelievable': 1, 'boots': 2, 'sassy': 1, 'mccully': 1, 'culcan': 1, 'rowlings': 3, 'trash': 4, 'organizing': 1, 'louie': 3, 'caped': 1, 'crusader': 2, 'faithful': 1, 'caine': 3, 'comer': 1, 'carriage': 2, 'slipper': 5, 'godmother': 1, 'happily': 2, '1863': 2, 'caan': 1, 'toll': 2, 'booth': 1, 'breaks': 8, 'retrain': 1, 'nemesis': 2, 'guitar': 1, 'sherif': 1, 'reboot': 7, 'climb': 1, 'walls': 4, 'webs': 1, 'villan': 1, 'insert': 1, 'wished': 1, 'ein': 1, 'joker': 3, 'steamy': 1, 'kathleen': 4, 'turner': 5, 'sources': 1, 'ernest': 3, 'borgnine': 1, 'prey': 1, 'albert': 3, 'cazale': 1, 'klaus': 1, 'kinski': 1, 'guinness': 2, 'merchant': 1, 'ivory': 1, 'pkd': 1, 'news': 5, 'paddled': 1, 'canoe': 2, 'prior': 2, 'ironic': 2, 'cinema': 3, 'bankruptcy': 1, 'jokingly': 2, 'osca': 1, 'drink': 3, 'nicole': 3, 'kidman': 3, 'satine': 1, 'baz': 2, 'luhrmann': 1, 'nigel': 2, 'camelot': 2, 'cherie': 1, 'lunghi': 1, 'guenevere': 1, 'refereed': 1, 'cdc': 1, 'okay': 1, 'wartime': 1, 'overwhelmed': 1, 'cousins': 1, 'villains': 2, 'splash': 1, 'transition': 3, 'fritz': 3, 'lang': 5, 'silverware': 1, 'hasbro': 2, 'sixteenth': 1, 'rewrites': 1, 'gambling': 3, 'scam': 2, 'idealist': 3, 'barges': 2, 'realm': 4, 'copy': 1, 'ares': 1, 'hades': 5, 'lorre': 5, 'libbing': 1, 'annoyance': 1, 'ostrum': 1, 'bucket': 2, 'weller': 3, 'reassembled': 1, 'indestructible': 1, 'verhoeven': 2, 'photographer': 4, 'kincaid': 2, 'wanders': 4, 'francesca': 1, 'cliint': 1, 'please': 2, 'dubbed': 1, 'racecar': 1, 'campaign': 4, 'succumbs': 1, 'lust': 3, 'ermey': 2, 'personally': 4, 'supervised': 2, 'recreation': 1, 'parris': 1, 'upstart': 1, 'producer': 7, 'accepts': 5, 'challenge': 2, 'reviving': 1, 'daytime': 1, 'boom': 3, 'mancini': 1, 'remind': 1, 'academic': 1, 'probation': 1, 'preparatory': 1, 'fielding': 1, 'rex': 6, 'eliza': 1, 'doolittle': 1, 'anouilh': 1, 'faux': 1, 'banging': 1, 'shearer': 1, 'mckean': 1, 'derek': 3, 'jnr': 1, 'moriaty': 1, 'clothes': 1, 'natural': 1, 'lincoln': 4, 'probably': 1, 'notable': 1, 'serling': 1, 'memoirs': 4, 'jeopardy': 1, 'streets': 7, 'bernstein': 1, 'livingston': 4, 'lampoons': 1, 'wise': 7, 'cracking': 2, 'accolades': 1, 'math': 2, 'hoard': 1, 'sell': 1, 'scandalous': 1, 'arena': 3, 'london': 6, 'cleef': 1, 'eli': 3, 'wallach': 2, 'mcgovern': 1, 'turturro': 3, 'sherwood': 1, 'surrounds': 1, 'merry': 2, 'cabrini': 1, 'projects': 1, 'traverse': 1, 'creators': 3, 'overcoming': 2, 'adversity': 2, 'cameo': 1, 'eleven': 3, 'belongs': 2, 'woke': 1, 'explore': 1, 'stampede': 1, 'sissy': 3, 'spacek': 3, 'spencer': 6, 'lawyers': 2, 'opposing': 2, 'imagery': 1, 'environment': 4, 'monoliths': 1, 'poked': 1, 'comedians': 2, 'preparations': 1, 'display': 1, '1938': 6, 'carrel': 5, 'fanning': 2, 'lukewarm': 1, 'reviews': 2, 'hersheys': 1, 'strongly': 1, 'affinity': 1, 'awful': 2, 'eclectic': 1, 'hays': 1, 'jabbar': 1, 'staute': 1, 'thrills': 6, 'charley': 2, 'brewster': 2, 'guesses': 1, 'dandrige': 1, 'string': 2, 'profits': 1, 'morph': 1, 'miner': 1, 'extraterrestrial': 5, 'passed': 3, 'sidney': 5, 'prescott': 2, 'thanks': 2, 'visited': 9, 'painless': 1, 'coincide': 2, 'choices': 2, 'accidental': 3, 'mix': 4, 'identical': 3, 'plaid': 1, 'overnight': 2, 'bags': 2, 'journal': 3, 'milne': 1, 'hundred': 1, 'acre': 1, '16': 6, 'slayer': 1, 'monstrosities': 1, 'organized': 3, 'dynasty': 2, 'transfers': 2, 'clandestine': 1, 'aptly': 1, 'realizing': 1, 'faked': 1, 'victory': 3, 'magazine': 1, 'columnist': 1, 'gladys': 2, 'taber': 1, 'stillmeadow': 1, 'charlestown': 1, 'chiefs': 1, 'coached': 2, 'reggie': 2, 'dunlop': 1, 'kubreck': 1, 'clock': 1, 'borrowing': 1, 'malcom': 1, 'commits': 2, 'doorknob': 1, 'comback': 1, 'speaks': 5, 'yiddish': 1, 'blacks': 1, 'deceiving': 1, 'economist': 1, 'delusions': 2, 'detailed': 1, 'diner': 3, 'lykans': 1, 'haired': 2, 'stumbled': 1, 'mayor': 1, 'sleigh': 2, 'wonderland': 2, 'vs': 3, 'document': 2, 'bike': 5, 'zuckerberg': 4, 'dorm': 3, 'boyfriend': 6, 'imagines': 1, 'whether': 1, 'penis': 1, 'kissing': 1, 'staying': 2, 'carried': 2, 'peacock': 3, 'conquering': 2, 'prejudice': 1, 'jurors': 1, 'slowly': 4, 'trial': 3, 'leap': 2, 'guard': 6, 'arriving': 1, 'forman': 3, 'enough': 3, 'greatly': 1, 'punching': 1, 'hammil': 1, 'continue': 1, 'sweeping': 1, 'unlinked': 1, 'fend': 2, 'oversees': 1, 'haunts': 2, 'hot': 6, 'inspiration': 3, 'prolific': 2, 'stupidest': 1, 'commercials': 1, 'rapeable': 1, 'orlando': 2, 'bloom': 2, 'viggo': 2, 'mortensen': 2, 'serie': 2, 'umbrellas': 1, 'puddle': 1, 'hopping': 1, 'herb': 1, 'colorful': 7, 'impostor': 1, 'mechanical': 4, 'ruining': 1, 'seals': 5, 'labeouf': 3, 'curtain': 1, 'voted': 5, 'premiere': 1, 'dolittle': 1, 'raid': 1, 'badges': 6, 'ai': 2, 'stinking': 1, '36': 1, 'consecutive': 1, 'pidgeon': 1, 'tempest': 1, 'laurents': 1, 'gangs': 15, 'disillusioned': 1, 'palme': 1, 'palm': 1, '227': 1, 'lennon': 2, 'chrysanthemum': 1, 'opened': 3, 'franklin': 1, 'schaffner': 1, 'bleak': 1, 'tis': 1, 'scratch': 1, 'galahad': 1, 'lancelot': 4, 'bedevere': 1, 'warlord': 4, 'montand': 1, 'marcel': 1, 'pagnol': 1, 'collaborations': 1, 'starling': 1, 'lector': 3, 'credits': 1, 'offering': 1, 'launched': 7, 'depiction': 2, 'harsh': 3, 'realities': 3, 'dueling': 2, 'hooded': 2, 'jonze': 1, 'averts': 1, 'jazz': 3, 'latifa': 1, 'nabbing': 1, 'aileen': 2, 'wuornos': 1, 'daytona': 1, 'crossover': 1, 'rumored': 1, 'published': 5, 'chronological': 1, 'poverty': 1, 'tsa': 1, 'employee': 7, 'somehow': 1, 'contemporary': 2, 'vik': 2, 'muniz': 2, 'boost': 1, 'malkovich': 5, 'timothy': 4, 'olyphant': 2, 'iowan': 1, 'violently': 1, 'levi': 2, 'labeauf': 1, 'portis': 2, 'disasters': 1, 'greece': 1, 'remaining': 1, 'maids': 3, 'pegg': 2, 'gabriele': 2, 'muccino': 2, 'coaching': 1, 'rap': 1, 'wu': 2, 'tang': 1, 'clan': 2, '11': 6, 'reunites': 1, 'worked': 2, 'beetlejuice': 1, 'karl': 2, 'reconstruct': 1, 'duo': 2, 'vow': 1, 'philandering': 2, 'ufc': 1, 'wookies': 1, 'tan': 2, 'cels': 1, 'misses': 3, 'replica': 1, 'departure': 1, 'orphaned': 5, 'kobe': 1, 'bombing': 1, 'enforcement': 2, 'boundaries': 1, 'troublesome': 1, 'gehrig': 1, 'stoically': 1, 'finishes': 1, 'namesake': 1, 'visual': 2, 'afterlife': 2, 'mathmetician': 1, 'adjusted': 1, 'inflation': 1, 'siegel': 1, 'abandons': 3, 'blatty': 2, 'entering': 4, 'flailing': 1, 'claus': 7, 'deemed': 2, 'arguing': 1, 'indeed': 1, 'motley': 1, 'active': 2, 'discontentment': 1, 'opens': 2, 'shattering': 1, 'idealistic': 1, 'utilizing': 1, 'parliamentary': 1, 'fued': 1, 'orders': 3, 'strike': 1, 'soviet': 3, 'avoid': 1, 'apocalypse': 3, 'rodney': 3, 'dangerfield': 3, 'gophers': 1, 'cleavon': 2, 'acceptance': 2, 'wadiya': 1, 'discussion': 1, 'hayden': 2, 'panettiere': 2, 'fargo': 1, 'feels': 3, 'herman': 3, 'detailing': 2, 'whaling': 1, 'vessel': 1, 'respective': 1, 'il': 3, 'buono': 1, 'brutto': 1, 'cattivo': 1, 'orphanage': 1, 'picnic': 1, 'baskets': 1, 'hanna': 1, 'britsh': 1, 'renamed': 1, 'uk': 2, 'traces': 3, 'roots': 2, 'lester': 1, 'ringo': 1, 'starr': 1, 'aspects': 2, 'technically': 1, 'podracing': 1, 'tournament': 5, 'decoy': 1, 'introduction': 1, 'jar': 3, 'binks': 1, 'deliver': 2, 'packages': 2, 'galifiankis': 2, 'invite': 1, 'losers': 1, 'deployed': 1, 'themepark': 1, 'beggers': 1, 'shakespearian': 2, 'decorations': 1, 'hitchock': 1, 'arthouse': 1, 'wong': 2, 'kar': 2, 'wai': 2, 'spouses': 2, 'clarence': 1, 'faith': 4, 'getchell': 1, 'rutger': 3, 'hauer': 3, 'rhi': 1, 'halmi': 1, 'junior': 1, 'weathers': 1, 'daryl': 4, 'hannah': 4, 'marker': 1, 'stowe': 1, 'plummer': 2, 'morse': 1, 'ants': 2, 'grasshopper': 1, 'satirizing': 1, 'typical': 2, 'unkrich': 1, 'gilroy': 1, 'potter': 2, 'watkins': 1, 'stoller': 1, 'strained': 1, 'continually': 2, 'extended': 2, 'thoroughbred': 1, 'rescued': 2, 'nurtured': 1, 'shipwreck': 1, 'pajamas': 2, 'wear': 4, 'macaw': 4, 'bird': 11, 'blu': 3, 'madly': 1, 'suburbs': 2, 'israeli': 1, 'lewi': 1, 'scarred': 3, 'avenger': 1, 'recruits': 4, 'lack': 1, 'widow': 6, 'obtain': 1, 'element': 2, 'janus': 1, 'glittering': 1, 'cruel': 1, 'michel': 3, 'gondry': 2, 'sharecroppers': 1, 'vivid': 1, 'whovians': 1, 'successfully': 1, 'creative': 2, 'everyday': 2, 'keitel': 5, 'pimp': 2, 'shared': 4, 'wy': 1, 'tone': 1, 'greeting': 2, 'precious': 2, 'toe': 2, 'octopus': 1, 'arms': 1, 'incarnation': 1, 'norse': 4, 'appeal': 1, 'dna': 3, 'wad': 1, 'resin': 1, 'worse': 1, 'biel': 6, 'cairn': 1, 'terrier': 1, 'dropout': 3, 'investment': 4, 'firm': 4, 'legitimate': 4, 'sounds': 3, 'trucks': 1, 'ambush': 1, 'constantly': 4, 'constant': 2, 'hyped': 2, 'bone': 1, 'steller': 1, 'realistically': 1, 'propose': 1, 'outlawed': 2, 'delinquent': 1, 'therapy': 1, 'effort': 5, 'orge': 2, 'lower': 3, 'steamliner': 1, 'hosted': 1, 'celebrates': 1, 'idiotic': 1, 'extraordinarily': 1, 'manipulative': 4, 'roguish': 2, 'reconstruction': 2, 'elemental': 1, '1897': 1, 'optics': 1, 'reverse': 1, 'damage': 1, 'reign': 1, 'israel': 1, 'loss': 3, 'wil': 2, 'fixing': 1, 'automobiles': 2, 'concentration': 4, 'precocious': 2, 'prancing': 1, 'snapping': 1, 'wailing': 1, 'divided': 1, 'album': 3, 'sessions': 1, 'bloomfield': 1, 'kooper': 1, 'stills': 1, 'educate': 1, 'walberg': 1, 'numbskull': 1, 'hysterical': 2, 'mistakes': 2, 'hitters': 2, 'potential': 1, 'explored': 3, 'research': 3, 'chemist': 1, 'dig': 1, 'parish': 2, 'sites': 1, 'dreyfus': 2, 'mclain': 1, 'aurora': 2, 'havasham': 1, 'describing': 1, 'successes': 1, 'blunders': 1, 'drummer': 1, 'confrontation': 2, 'natives': 2, 'topped': 1, 'avatar': 2, 'phillp': 1, 'jrr': 3, 'thats': 1, 'hanson': 4, 'curious': 2, 'maintain': 1, 'demme': 1, 'previous': 4, 'terrestrials': 1, 'risque': 1, 'rodgers': 1, 'hammerstein': 4, 'stansilaw': 1, 'lem': 1, 'avengers': 2, 'courtney': 1, 'terrors': 1, 'fears': 1, 'accepting': 2, 'throats': 1, 'weddings': 3, 'delroy': 1, 'lindo': 1, 'joint': 2, 'bedford': 2, 'stuyvesant': 1, 'felon': 2, 'fashionable': 1, 'manner': 2, 'aims': 1, 'kubrik': 1, 'defined': 3, 'loan': 2, 'collector': 2, 'whoville': 3, 'glowing': 2, 'user': 1, 'utilize': 1, 'toad': 1, 'smooches': 1, 'newest': 2, 'wildly': 2, 'billed': 2, 'v': 6, 'instinct': 1, 'guide': 4, 'perilous': 1, 'miles': 4, 'backpackers': 1, 'slovak': 1, 'promises': 2, 'hedonistic': 1, 'expectations': 2, 'gentlemen': 2, 'captive': 2, 'nemo': 1, 'narrates': 2, 'tribe': 6, 'unusually': 1, 'sharp': 2, 'celebrate': 4, 'pondering': 1, 'imitate': 1, 'stint': 1, 'stated': 1, 'paradise': 3, 'venezuela': 1, 'helium': 1, 'bikers': 3, 'housekeeping': 1, 'cliff': 1, 'forget': 2, 'phil': 2, 'stu': 4, 'doug': 2, 'stephanie': 5, 'plum': 1, 'assignment': 1, 'brooding': 1, 'vincente': 3, 'romanticized': 1, 'madea': 1, 'wehat': 1, 'gore': 1, 'verbinski': 1, 'peasant': 1, 'bandit': 1, 'tensions': 3, 'hottest': 1, 'jumping': 2, 'pheonix': 1, 'maybe': 2, 'discharged': 1, 'alps': 1, 'energy': 1, 'elizabethan': 1, 'versus': 1, 'weak': 2, 'myself': 1, 'luckiest': 1, 'investigated': 1, 'plains': 1, 'dust': 4, 'bowl': 3, 'bogie': 1, 'bacall': 2, 'hurricane': 2, 'wrapped': 1, 'antagonists': 1, 'addiction': 2, 'forms': 4, 'leisurely': 1, 'loopy': 1, 'shift': 1, 'shape': 4, 'liquid': 1, 'amazon': 2, 'infraction': 1, 'misfits': 1, 'joining': 2, 'fireman': 1, 'manages': 1, 'stoppard': 1, 'consumed': 2, 'jealousy': 3, 'elaborate': 2, 'trap': 1, 'homer': 1, 'odyssey': 2, 'pennsylvania': 1, 'owns': 3, 'rachmaninoff': 1, 'background': 2, 'visions': 1, 'mesa': 1, 'sw': 1, 'mild': 1, 'mannered': 1, 'cartoons': 2, 'degree': 2, 'ruin': 1, 'hustler': 1, 'matches': 1, 'gleason': 2, 'yeats': 1, 'cormac': 2, 'education': 1, 'flower': 6, 'shaw': 4, 'pygmalion': 3, 'integration': 1, 'defeating': 2, 'valueless': 1, 'speck': 2, 'participates': 1, 'austen': 1, 'nonexistent': 1, 'kaiser': 1, 'sosa': 1, 'buttons': 1, 'sown': 1, 'preform': 1, 'float': 2, 'authorities': 1, 'pseudo': 4, 'bottom': 3, 'eighties': 2, 'sweeps': 1, 'combat': 2, 'sort': 4, 'weakling': 1, 'moral': 3, 'undergoes': 2, 'possess': 2, 'strength': 1, 'recorded': 1, 'visionary': 1, 'contends': 1, 'uncontrolled': 2, 'farming': 4, 'pare': 1, 'lorenz': 2, 'agriculture': 1, 'grimm': 2, 'wooden': 3, 'editor': 3, 'rosalind': 2, 'omler': 1, 'barber': 1, 'carraclough': 1, 'reluctantly': 2, 'wyman': 1, 'resolved': 1, 'humphey': 1, 'prospectors': 2, 'digging': 1, 'lauren': 1, 'tropical': 4, 'lamppost': 1, 'artistic': 2, 'paranoid': 4, 'wanna': 1, 'graziano': 2, 'mcmurray': 1, 'opus': 1, 'paparazzo': 2, 'scantily': 1, '007': 1, 'convent': 1, 'governess': 7, 'naval': 3, 'michelangelo': 2, 'pope': 2, 'julius': 2, 'plight': 2, 'bridgewater': 1, 'criminally': 2, 'price': 2, 'casting': 1, 'vastness': 1, 'telly': 1, 'savalas': 1, 'medium': 2, 'jaws': 1, 'gilliman': 1, 'unstable': 3, 'dramatization': 4, 'pacific': 2, 'cimino': 1, 'roulette': 2, 'stormin': 1, 'mawwage': 1, 'eliot': 3, 'karen': 2, 'fondness': 1, 'nosed': 3, 'griswold': 4, 'flashbacks': 2, 'winnebago': 1, 'jareth': 1, 'connelly': 5, 'wright': 2, 'patinkin': 2, 'corps': 1, 'drill': 1, 'instructor': 1, 'katsuhiro': 1, 'otomo': 1, 'hoskins': 2, 'intermingled': 1, 'solitary': 1, 'outpost': 3, 'bassett': 2, 'mick': 1, 'nichols': 6, 'cabaret': 1, 'homespun': 1, 'baldwin': 1, 'hogan': 2, 'elijah': 6, 'holm': 1, 'study': 3, 'mansion': 4, 'camping': 1, 'terrifying': 1, 'continued': 1, 'wuxia': 3, 'prodigy': 1, 'bush': 1, 'administration': 1, 'emperors': 1, 'seymor': 2, 'recounting': 1, 'washed': 5, 'braddock': 4, 'amongst': 2, 'polley': 1, 'engineers': 3, 'witherspoon': 11, 'malfeasance': 1, 'ho': 1, 'balthazar': 1, 'inherit': 2, 'alamos': 1, 'lise': 1, 'friedman': 2, 'ceil': 1, 'physical': 8, 'terapist': 1, 'tanner': 1, 'respectively': 4, 'mirren': 2, 'ops': 5, 'realization': 3, 'bobby': 1, 'floor': 5, 'payments': 2, 'secrecy': 2, 'heigl': 2, 'antin': 1, 'carrying': 2, 'curator': 1, 'anniston': 6, 'switches': 1, 'samples': 1, 'scifi': 1, 'greg': 6, 'strause': 2, 'abduction': 2, 'poems': 1, 'diving': 1, 'zooey': 1, 'deschanel': 1, 'mortimer': 1, 'chu': 1, 'international': 3, 'premier': 1, 'occurs': 1, 'suspects': 3, 'brewer': 1, 'largely': 1, 'lurie': 1, 'alexander': 3, 'skarsg': 1, 'ginnifer': 6, 'goodwin': 6, 'hudson': 5, 'cutcher': 1, 'reprising': 2, 'blaze': 1, 'morphs': 1, 'incarnations': 2, 'bethany': 2, 'keeping': 2, 'irvine': 2, 'thewlis': 3, 'extraordinary': 2, 'grants': 2, 'desires': 1, 'messenger': 2, 'survives': 2, 'walters': 2, 'brenda': 1, 'dea': 2, 'iron': 6, 'thor': 3, 'indecisive': 1, 'pot': 3, 'kingpin': 3, 'unleashed': 1, 'collegiate': 2, 'freshman': 1, 'grandparents': 1, 'granchildren': 1, 'ban': 2, 'mothers': 2, 'inner': 3, 'underachieving': 1, 'elisabeth': 5, 'shue': 5, 'dollar': 1, 'kitsch': 2, 'zemekis': 1, 'commercial': 2, 'transformed': 7, 'antebellum': 1, 'charter': 2, 'broadcast': 2, 'representative': 1, 'li': 6, 'dolph': 1, 'lundgren': 1, 'randy': 1, 'cotor': 1, 'crews': 2, 'horatio': 1, 'mistake': 2, 'hitchcok': 1, 'contained': 3, 'stabbed': 1, 'hiromasa': 1, 'yonebayashi': 1, 'borrowers': 1, 'polluted': 1, 'bookworm': 1, 'slinky': 1, 'potato': 1, 'gained': 3, 'billing': 1, 'portion': 1, 'repunzal': 1, 'adrian': 3, 'hold': 5, 'celebrities': 1, 'pug': 1, 'allegations': 1, 'capsized': 1, 'dazzling': 2, 'biological': 1, 'jersey': 1, 'category': 1, 'delightful': 1, 'enjoy': 1, 'lever': 1, 'bb': 3, 'angry': 3, 'hawks': 4, 'paired': 3, 'conneley': 1, 'bowe': 1, 'artifact': 3, 'retake': 1, 'litter': 2, 'hough': 2, 'rufus': 2, 'iowa': 4, 'cornfield': 1, 'cushing': 1, 'helsing': 1, 'bloodsucker': 1, 'lam': 1, 'lone': 4, 'safety': 3, 'feig': 1, 'alumni': 1, 'nbc': 1, 'caesar': 2, 'jared': 1, 'hess': 1, 'eponymous': 2, 'meatloaf': 2, 'fishnet': 1, 'stockings': 1, 'culturally': 1, 'significant': 1, 'alert': 1, 'current': 4, 'cronenberg': 1, 'goldbloom': 1, 'teleportation': 1, 'mamet': 1, 'professionally': 1, 'hideous': 2, 'absence': 1, 'timon': 1, 'pumba': 1, 'offspring': 1, 'difficulties': 3, 'vignettes': 2, 'groundbreaking': 2, 'gylenhal': 2, 'doo': 1, 'wop': 1, 'newton': 2, 'preppy': 2, 'rogue': 5, 'replicants': 3, 'roth': 1, 'charecterized': 1, 'reliving': 1, 'youths': 1, 'catching': 2, 'wait': 2, 'roundtree': 1, 'isaac': 1, 'hayes': 2, 'rubber': 1, 've': 6, 'began': 2, 'climax': 1, 'mutant': 2, 'beginnings': 2, 'solver': 1, 'skit': 1, 'chastain': 1, 'clueless': 1, 'octavia': 1, 'guiding': 1, 'strathairn': 1, 'specially': 1, 'antiquated': 1, 'sheik': 1, 'infatuated': 1, 'englishwoman': 1, 'abducts': 1, 'saharan': 1, 'scimitars': 1, 'acrobatics': 1, 'vulgar': 1, 'tragedies': 2, 'passangers': 1, 'redgrave': 1, 'glenda': 1, 'harrelson': 1, 'mcdonagh': 2, 'bruges': 2, 'rallies': 1, 'oppressive': 1, 'ratched': 1, 'deranged': 3, 'anchor': 4, 'ravings': 1, 'revelations': 1, 'media': 3, 'profit': 1, 'generate': 1, 'scaring': 1, 'sorkin': 1, 'legal': 3, 'recourse': 1, 'lavish': 1, 'gasoline': 1, 'grammy': 1, 'hefley': 2, 'concentrates': 1, 'lesson': 2, 'walken': 2, 'torro': 1, 'snakes': 1, 'itzhak': 1, 'stern': 2, 'national': 1, 'crop': 2, 'dusting': 1, 'toungue': 1, 'ebeneezer': 1, 'gunner': 2, 'sociopath': 1, 'gein': 1, 'pursues': 1, 'checked': 1, 'reid': 2, 'angelos': 2, 'benecio': 2, 'modernization': 1, 'carol': 6, 'donner': 3, 'cinematography': 2, 'winstead': 2, 'lowe': 1, 'lacey': 1, 'chabert': 1, 'trachtenberg': 1, 'cloke': 1, 'andrea': 1, 'bomback': 1, '1921': 1, 'lasky': 1, 'melford': 1, 'valentino': 1, 'agnes': 1, 'ayres': 1, 'adolphe': 1, 'menjou': 1, '1932': 5, 'sideshow': 1, 'performers': 2, 'composed': 1, 'rko': 1, 'tailed': 1, 'cotten': 1, 'alida': 1, 'valli': 1, 'trevor': 1, 'particularly': 1, 'atmospheric': 1, 'entre': 1, 'les': 1, 'morts': 1, 'boileau': 3, 'narcejac': 2, 'jerome': 1, 'lerner': 1, 'loewe': 1, 'yakuza': 2, 'seijun': 1, 'suzuki': 1, 'tetsuya': 1, 'watari': 1, 'reformed': 2, 'tetsu': 1, 'roam': 1, 'execution': 1, '1913': 1, 'lardner': 1, 'hooker': 1, 'mash': 1, 'doctors': 3, 'boorman': 2, 'wolfe': 1, 'speed': 2, 'aeronautical': 1, 'edwards': 2, 'mercury': 1, 'manned': 1, 'spaceflight': 1, 'burlinson': 1, 'williamson': 1, 'ballard': 1, 'autobiographical': 2, 'puyi': 1, 'peploe': 1, 'zwick': 1, 'volume': 1, 'laura': 2, 'hillenbrand': 1, 'commentator': 1, 'cressida': 1, 'cowell': 1, 'mechner': 1, 'boaz': 2, 'yakin': 2, 'miro': 1, 'carlo': 1, 'newell': 1, 'bruckheimer': 1, 'edited': 2, 'stamm': 1, 'thekla': 1, 'reuten': 1, 'violante': 1, 'placido': 1, 'irina': 1, 'bj': 1, 'rklund': 1, 'paolo': 1, 'bonacelli': 1, 'jo': 2, 'jardim': 2, 'harley': 1, 'cooperation': 1, 'scavengers': 1, 'recyclables': 1, 'gramacho': 1, 'landfills': 1, 'serving': 4, 'metropolis': 1, 'rio': 5, 'janeiro': 2, 'rockwell': 2, 'kenny': 1, 'florian': 1, 'henckel': 1, 'donnersmarck': 1, 'yuh': 1, 'nelson': 4, 'whitesell': 1, 'fogel': 1, 'rhymer': 1, 'dystopia': 1, 'bettany': 4, 'wilde': 2, 'rosenberg': 1, 'prosthetic': 1, 'fogelman': 1, 'picasso': 1, 'wenk': 2, 'kaufman': 2, 'agosto': 1, 'payday': 1, 'bibi': 1, 'andersson': 1, 'liv': 1, 'ullmann': 1, 'gig': 1, 'excorcist': 1, 'locking': 1, 'hoards': 1, 'nathanson': 1, 'potentially': 1, 'filthy': 1, 'thirteen': 1, 'placed': 3, 'avi': 1, 'gibsons': 1, 'raging': 3, 'bull': 2, 'schrader': 1, 'mardik': 1, 'memoir': 2, '2029': 1, 'biehn': 2, 'palma': 3, 'leung': 2, 'ephron': 2, 'malick': 1, 'exceptional': 1, 'corbett': 1, 'miller': 6, 'clay': 2, 'bana': 2, 'toni': 2, 'collette': 2, 'nickelodeon': 1, 'disabled': 3, 'bennett': 1, 'beane': 3, 'derailing': 1, 'subsequent': 1, 'trank': 1, 'telekinetic': 1, 'hires': 6, 'homeless': 3, 'breasts': 1, 'hilarity': 2, 'nell': 1, 'materialistic': 1, 'boob': 1, 'baking': 1, 'warming': 2, 'arch': 3, 'ewan': 8, 'macgregor': 3, 'quinlan': 1, 'civilization': 1, 'stays': 4, 'willie': 3, 'hopeful': 1, 'scandals': 1, 'cultivates': 1, 'contracts': 1, 'scenario': 1, 'brains': 1, 'possibly': 4, 'magneto': 4, 'druglords': 1, 'cyborgs': 1, 'notice': 1, 'mma': 4, 'sturges': 1, 'residents': 1, 'fact': 2, 'cheung': 1, 'expressionist': 1, 'brigitte': 1, 'helm': 1, 'fancy': 2, 'donation': 1, 'museum': 3, 'leopard': 1, 'onset': 2, 'segments': 2, 'confronted': 1, 'hobo': 3, 'commander': 1, 'tempted': 2, 'railway': 2, 'mime': 1, 'thwarted': 1, 'entertainer': 3, 'declares': 2, 'occupied': 3, 'territory': 2, 'placement': 1, 'uprising': 1, 'bligh': 1, 'sailing': 1, 'tahiti': 1, 'benjamin': 3, 'mia': 1, 'farrow': 2, 'troupe': 3, 'pryce': 2, 'bureaucrat': 2, 'bureaucracy': 1, '96': 1, 'malls': 1, 'confront': 1, 'satirizes': 2, 'bands': 1, 'swimming': 5, 'cocoons': 1, 'youthful': 1, 'chapionship': 1, 'resurrected': 1, 'patric': 1, 'kiefer': 1, 'sutherland': 2, 'battles': 8, 'showcasing': 1, 'randall': 3, 'dale': 2, 'sentenced': 1, 'willian': 1, 'munny': 1, 'patricia': 2, 'arquette': 1, 'slackers': 1, 'linear': 1, 'stores': 1, 'randal': 1, 'liberate': 1, 'pretend': 1, 'recounts': 1, 'landings': 2, 'morre': 1, 'polansky': 1, 'smuggler': 3, 'luc': 4, 'besson': 4, 'mercenary': 1, 'zellwegger': 1, 'gould': 1, 'benches': 1, 'mvp': 1, 'grades': 1, 'infernal': 1, 'ivan': 3, 'emilie': 2, 'ravin': 3, 'markle': 1, 'freight': 1, 'loretta': 2, 'devine': 1, 'goode': 1, 'ireland': 2, 'craigh': 1, 'kosinski': 1, 'redform': 1, 'surratt': 1, 'robyn': 1, 'doherty': 1, 'hiccup': 1, 'desertto': 1, 'conceived': 1, 'abe': 1, 'abstract': 1, 'leonoardo': 1, 'undefeated': 2, 'warrant': 1, 'millar': 1, 'tracey': 4, 'edmonds': 1, 'misfortunes': 2, 'serve': 1, 'laz': 1, 'alonso': 1, 'folk': 1, 'chandler': 2, 'exploration': 1, 'janiero': 1, '51': 1, 'connick': 3, 'ashley': 3, 'sturgess': 3, 'peeing': 2, 'greta': 2, 'gerwig': 1, 'navy': 9, 'briefcase': 2, 'replacing': 1, 'treks': 1, 'sorta': 1, 'magee': 1, 'suraj': 1, 'sharma': 1, 'afterwards': 1, 'jonny': 3, 'rhianna': 1, 'fleet': 1, 'ships': 3, 'squadron': 1, 'timur': 1, 'bekmambetov': 1, 'hutcherson': 2, 'lycans': 1, 'mcg': 2, 'vying': 1, 'judges': 1, 'mather': 1, 'leger': 1, 'radcliff': 2, 'lookout': 1, 'slayed': 1, 'voldemort': 2, 'ordeals': 1, 'submariners': 1, 'morgana': 1, 'le': 1, 'darkest': 1, 'sorceress': 2, 'popeye': 1, 'rough': 2, 'irresponsible': 1, 'sail': 2, 'wardrobe': 4, 'tippi': 1, 'hedren': 1, 'woodsboro': 1, 'birthday': 4, 'practice': 1, 'kungfu': 1, 'struck': 3, 'houses': 2, 'borrow': 1, 'irons': 1, 'repunzel': 1, 'cost': 1, 'siblings': 3, 'bears': 2, 'selena': 3, 'curls': 1, 'jessie': 2, 'statistical': 1, 'analysis': 2, 'larger': 2, 'luhrman': 1, 'misadventures': 5, 'spec': 1, 'acne': 1, 'tautou': 1, 'shut': 2, 'notoriously': 1, 'likes': 8, 'safari': 1, 'midgets': 1, 'bookish': 2, 'eventual': 1, 'light': 4, 'sabers': 1, 'animalistic': 1, 'bah': 3, 'humbug': 3, 'grouchy': 2, 'wakes': 7, 'shakepeare': 1, 'intricate': 1, 'grind': 1, 'stops': 1, 'promoted': 1, 'efficiency': 1, 'experts': 1, 'interrupts': 2, 'impersonates': 1, 'dutch': 2, 'mo': 1, 'channel': 1, 'cycle': 3, 'ears': 3, 'fawn': 1, 'vil': 1, 'jonathon': 1, 'warthog': 1, 'stepfamily': 1, 'royalty': 2, 'voodoo': 2, 'shaman': 1, 'gains': 1, 'appreciation': 1, 'furniture': 1, 'dishes': 1, 'heralded': 1, 'ascension': 2, 'kingship': 1, 'silverman': 1, 'surround': 1, 'minimal': 1, 'spreads': 1, 'epidemically': 1, 'laso': 1, 'monty': 2, 'jouney': 1, 'hairy': 1, 'feet': 1, 'rakish': 1, 'rhett': 1, 'picked': 1, 'schoolmates': 1, 'verge': 1, 'tools': 1, 'austria': 1, 'swimmers': 2, 'poland': 3, 'drives': 1, 'batmobile': 1, 'arrive': 1, 'goth': 1, 'sake': 1, 'pressure': 2, 'rife': 1, 'opts': 1, 'erased': 2, 'wo': 3, 'locales': 1, 'invade': 2, 'colored': 1, 'slaves': 3, 'homes': 3, 'luther': 1, 'judi': 1, 'undertone': 1, 'boulders': 1, 'inmate': 4, 'clues': 2, 'disappearances': 1, 'flash': 1, 'mobs': 1, 'handheld': 1, 'surveillance': 3, 'passage': 1, 'virtual': 5, 'complains': 1, 'asian': 3, 'stoners': 1, 'holloween': 1, 'incredibly': 1, 'enormous': 2, 'preys': 2, 'inspirational': 6, 'spirituality': 1, 'connects': 1, 'directorhood': 1, 'insured': 1, 'chronic': 1, 'tub': 1, 'kasdan': 3, 'seduced': 1, 'tricked': 2, 'object': 1, 'rejuvenate': 1, 'rivals': 4, 'heros': 1, 'racist': 1, 'townfolk': 1, 'financials': 1, 'clue': 5, 'disappeared': 2, 'chocolate': 1, 'painting': 1, 'latin': 1, 'suspension': 2, 'babysits': 3, 'revolt': 4, 'executed': 1, 'creacher': 1, 'pushed': 2, 'limits': 2, 'gamble': 1, 'underneath': 1, 'freelance': 3, 'kicking': 1, 'sparkled': 1, 'clear': 6, 'enamoured': 1, 'hep': 1, 'defenseless': 1, 'fumble': 1, 'petty': 1, 'hopes': 3, 'safely': 1, 'marshal': 5, 'rope': 1, 'playground': 1, 'creek': 1, 'ideal': 2, 'biography': 2, 'abused': 2, 'beagle': 1, 'amateur': 1, 'file': 1, 'evolved': 2, 'chimps': 1, 'dominate': 1, 'sniper': 1, 'fields': 2, 'beth': 1, 'britt': 3, 'teaming': 2, 'cow': 1, 'boobs': 1, 'mitch': 2, 'gulag': 2, 'escapees': 2, '4000': 1, 'overland': 1, 'dressing': 2, 'twisp': 1, 'sight': 2, 'sheeni': 1, 'fouth': 1, 'braving': 1, 'quint': 2, 'lago': 1, 'othello': 1, 'manipulate': 2, 'explorer': 3, 'mushrooms': 1, 'enter': 2, 'tied': 3, 'highschool': 1, 'sweethearts': 2, 'ma': 1, 'greedy': 4, 'monologue': 2, 'makeup': 2, 'mcfarland': 1, 'harms': 1, 'suspected': 2, 'holds': 4, 'meetings': 1, 'buffalo': 1, 'ewoks': 2, 'chests': 1, 'demigod': 1, 'persius': 1, '1074': 1, 'jury': 1, 'executioner': 1, 'authority': 1, 'hauling': 1, 'cargo': 1, 'represents': 2, 'conversation': 3, 'sandwiches': 1, 'breathing': 1, 'forming': 4, 'attachment': 1, 'quintuplets': 1, 'solid': 1, 'constructs': 1, 'skerritt': 1, 'incubates': 1, 'meal': 2, 'stupedest': 1, 'weirdest': 1, 'idiots': 1, 'crippled': 1, 'workforce': 1, 'routines': 1, 'explains': 1, 'lackluster': 1, 'diary': 1, 'controlled': 1, 'chavez': 1, 'nations': 2, 'attic': 1, 'incompetent': 1, 'recovers': 1, 'repair': 1, 'chimp': 1, 'troubles': 2, 'georgia': 2, 'solved': 1, 'conspiracy': 4, 'endanger': 1, 'micky': 2, 'progresses': 1, 'trusted': 1, 'shifter': 1, 'tables': 2, 'roughnecks': 1, 'imperials': 1, 'occupation': 3, 'abs': 1, 'sara': 4, 'cent': 1, 'carel': 1, 'advocating': 1, 'hats': 1, 'requirements': 1, 'themsleves': 1, 'shady': 1, 'overenthusiastic': 1, 'racoon': 1, 'atlantic': 1, '1919': 1, 'amnesia': 2, 'obsesses': 1, 'minka': 1, 'reserved': 1, 'seat': 2, 'villian': 2, 'sailors': 2, 'presentable': 1, 'minions': 4, 'adopting': 2, 'bugs': 1, 'underwood': 1, 'cheetahs': 1, 'middler': 1, 'maze': 3, 'else': 3, 'limb': 1, 'sideways': 1, 'puft': 1, 'cabinet': 1, 'hooked': 1, 'inswct': 1, 'jimmie': 1, 'colossal': 1, 'shine': 1, 'cody': 1, 'jarrett': 1, 'saks': 2, 'slater': 1, '1001': 1, 'nights': 2, 'virtuous': 1, 'lords': 3, 'frame': 1, 'whoiver': 1, 'carry': 4, 'turbulent': 1, 'montage': 1, 'baxter': 1, 'dalmations': 1, 'meat': 2, 'bicycle': 2, 'pheiffer': 1, 'critical': 3, 'darling': 1, 'cultural': 1, 'viewed': 1, 'yell': 1, 'bitch': 1, 'thumper': 2, 'restoring': 1, 'cumberbatch': 1, 'hiddleston': 2, 'hauted': 1, 'pleasure': 1, 'holding': 3, 'orgasm': 1, 'scharwtzman': 1, 'gideon': 2, 'directions': 1, 'intro': 1, 'mercilessly': 1, 'cassette': 1, 'raider': 1, 'hulk': 2, 'colony': 2, 'anthropologist': 1, 'gorillas': 2, 'spying': 1, 'sticks': 1, 'microscopic': 1, 'quebec': 1, 'places': 5, 'medical': 5, 'womanizing': 1, 'solves': 2, 'mysteries': 1, 'competes': 1, 'minute': 3, 'chariot': 1, 'delivering': 2, 'rendition': 2, 'roaming': 1, 'tremblay': 1, 'patterned': 1, 'kick': 1, 'sharpen': 1, 'invetrofertilization': 1, 'integrate': 1, 'gruen': 1, 'presumed': 1, 'primarily': 4, 'oregon': 1, 'lied': 1, 'anakin': 2, 'tattooine': 2, 'galaxy': 4, 'waddums': 1, 'skynet': 1, 'resistence': 1, 'byrne': 3, 'advisor': 1, 'singers': 4, 'rockatansky': 1, 'bridget': 1, 'cedric': 1, 'damme': 2, 'norris': 3, 'icons': 1, 'swing': 1, 'gecko': 1, 'dock': 1, 'unions': 1, 'pits': 3, 'odin': 2, 'englishman': 1, 'nph': 1, 'approval': 1, 'vary': 2, 'behave': 1, 'wendelin': 1, 'draanen': 1, 'magento': 2, 'conservative': 4, 'ally': 3, 'bruiser': 1, 'vadar': 2, 'insight': 1, 'lakeside': 1, 'retreat': 3, 'mater': 3, 'grandpa': 1, 'gentile': 1, 'strives': 1, 'feeds': 1, 'reece': 1, 'motion': 9, 'embraces': 1, 'arrogant': 2, 'onscreen': 1, 'shelter': 1, 'regulate': 1, 'lon': 1, 'chaney': 1, 'bubba': 1, 'primary': 1, 'protectors': 1, 'punishment': 1, 'overgrown': 1, 'wreaks': 3, 'carney': 1, 'airborne': 2, 'graves': 1, 'reitman': 2, 'aykroyd': 3, 'harold': 5, 'ramis': 4, 'inbred': 1, 'cannibals': 1, 'pursuit': 3, 'salim': 1, 'akil': 1, 'nineteen': 2, 'stripping': 2, 'guided': 1, 'farmers': 2, 'diseased': 1, 'grips': 1, 'embarking': 1, 'wake': 4, 'spectre': 1, 'inheriting': 1, 'bandidos': 1, 'stinkin': 1, 'ricki': 1, 'clevon': 1, 'egg': 2, 'failure': 3, 'precrime': 1, 'lenny': 1, 'mystics': 1, 'goers': 1, 'scariest': 1, 'oppose': 1, 'kowalski': 1, 'thru': 1, 'compromise': 1, 'lying': 1, 'skulpt': 1, 'fir': 1, 'growers': 1, 'chon': 1, 'lineup': 1, 'exposes': 1, 'shifting': 2, 'toon': 1, 'innocence': 1, 'profile': 2, 'dont': 1, 'pixie': 1, 'tinkerbell': 1, 'cards': 1, 'sought': 1, 'global': 2, 'outbreak': 1, 'blend': 2, 'obedient': 1, 'defeats': 1, 'duels': 1, 'backwards': 1, 'stairs': 1, 'suicidal': 1, 'valuable': 1, 'unbalanced': 1, 'vivian': 1, 'unforeseen': 2, 'rebuilding': 1, 'reinventing': 1, 'enslaved': 1, 'captors': 2, 'formed': 1, 'solely': 1, 'gin': 2, 'swilling': 1, 'riverboat': 1, 'persuaded': 1, 'strait': 1, 'laced': 2, 'wisdom': 2, 'blooded': 1, 'liver': 1, 'fava': 1, 'beans': 1, 'nice': 2, 'chianti': 1, 'widower': 1, 'disguising': 1, 'confide': 1, 'traditional': 1, 'spiteful': 1, 'rigged': 1, 'brandon': 2, 'routh': 2, 'proposed': 1, 'handbook': 1, 'mystical': 2, 'shepherdess': 1, 'fakes': 1, 'oakland': 3, 'athletics': 1, 'stan': 2, 'mogul': 1, 'randolph': 1, 'hearst': 1, 'suiters': 1, 'arnett': 1, 'shifu': 1, 'marial': 1, 'matthau': 5, 'hiatus': 1, 'wrecked': 1, 'lex': 2, 'luthor': 2, 'hovering': 1, 'skateboard': 1, 'taye': 1, 'diggs': 1, 'woes': 1, 'roderick': 1, 'whiterspoon': 1, 'wit': 1, 'catcher': 3, 'illness': 1, 'angelica': 2, 'bening': 1, 'artists': 4, 'marv': 1, 'relase': 1, 'nursery': 1, 'appearing': 1, 'directd': 1, 'upton': 1, 'sinclair': 1, 'sound': 3, 'editing': 3, 'farragut': 2, 'propaganda': 2, 'brussels': 1, 'slavers': 1, 'banding': 1, 'halle': 3, 'berry': 3, 'progress': 1, 'error': 6, 'destiny': 2, 'appears': 3, 'impression': 3, 'woe': 1, 'feuding': 2, 'classes': 1, 'jacobi': 2, 'sox': 1, 'bribes': 1, 'supplement': 1, 'measly': 1, 'milk': 1, 'hoax': 1, 'usher': 1, 'stylish': 1, 'constance': 1, 'towers': 1, 'blondes': 1, 'branaugh': 1, 'planning': 3, 'christians': 1, 'partial': 1, 'quotes': 2, 'stacked': 1, 'shit': 2, 'slimy': 1, 'scumbag': 1, 'puke': 1, 'wheelchair': 3, 'kirk': 2, 'gladiators': 1, 'taxidermy': 1, 'integrated': 1, 'switch': 3, 'grenades': 1, 'helmets': 1, 'mckellen': 2, 'nordic': 1, 'shane': 2, 'lotion': 1, 'potion': 1, 'hermione': 1, 'emerged': 1, 'getaway': 2, 'bigelow': 2, 'targeted': 2, 'loathing': 2, 'bandages': 1, 'influenced': 1, 'cg': 1, 'cheif': 1, 'asa': 2, 'runner': 1, 'refused': 1, 'sabbath': 1, 'hewitt': 1, 'mates': 1, 'karate': 1, 'ozarks': 1, 'scenerio': 1, 'seizes': 1, 'opportunity': 3, 'pretending': 1, 'kato': 2, 'vandalize': 1, 'closely': 1, 'resemble': 1, 'eduardo': 2, 'dilution': 1, 'percentage': 1, 'article': 2, 'chicken': 2, 'cannibalism': 1, 'inanimate': 2, 'playmates': 1, 'arrested': 1, 'switzerland': 1, 'decisions': 1, 'finished': 1, 'swiss': 1, 'arrest': 1, 'remained': 1, 'productions': 1, 'cecil': 2, 'dramatized': 1, 'hebrew': 2, 'egyptian': 1, 'deliverer': 1, 'natchitoches': 1, 'blends': 1, 'debute': 1, 'jenniffer': 1, 'oilman': 2, 'wealth': 1, 'centuries': 2, 'invitation': 1, 'managers': 2, 'humor': 1, 'afghanistan': 1, 'sebastian': 2, 'junger': 1, 'photojournalist': 2, 'hetherington': 1, 'pettyfer': 4, 'gwen': 1, 'stacy': 2, 'unicorn': 1, 'horn': 1, 'subject': 4, 'testing': 1, 'radiation': 1, 'simulation': 2, 'galifinakis': 2, 'ventures': 1, 'aws': 1, 'attacking': 2, 'beachgoers': 1, 'demonically': 1, 'ironically': 1, 'influence': 2, 'draws': 1, 'ligers': 1, 'thick': 2, 'fernando': 1, 'meirelles': 1, 'hid': 1, 'ventura': 1, 'commandos': 2, 'imagining': 1, 'midwest': 2, 'modified': 1, 'aragorn': 2, 'draw': 1, 'gaze': 1, 'approach': 2, 'images': 2, 'seagal': 1, 'cook': 1, 'battleship': 2, 'aided': 2, 'seize': 1, 'whom': 2, 'planned': 2, 'weaponized': 1, 'repossessed': 1, 'giallo': 1, 'gallery': 1, 'moviegoers': 1, 'hugely': 1, 'keir': 2, 'dullea': 2, 'overly': 2, 'middleweight': 1, 'paramedic': 2, 'edge': 2, 'severed': 2, 'consisted': 1, 'consists': 1, 'reference': 3, 'slavery': 2, 'alcatraz': 1, 'razor': 1, 'knives': 2, 'fingers': 2, 'nfl': 1, 'quarterback': 2, 'mechanic': 2, 'riot': 1, 'stunt': 1, 'retro': 2, 'unleashes': 2, 'tyranny': 1, 'epidemic': 1, 'secluded': 1, 'prix': 1, 'lt': 1, 'shed': 1, 'relative': 1, 'journeys': 1, 'wishing': 2, 'stallion': 3, 'participate': 2, 'babysitting': 1, 'unaware': 1, 'ahead': 1, 'gladiator': 1, 'trainer': 2, 'cavalry': 3, 'wonderful': 3, 'promise': 1, 'respect': 1, 'heffley': 2, 'paratrooper': 1, 'peppard': 1, 'truman': 2, 'capote': 2, 'feeling': 2, 'kevon': 1, 'macaws': 1, 'cockatoo': 1, 'smuggle': 1, 'beaten': 1, 'stiff': 1, 'lures': 1, 'rebellion': 1, 'rave': 1, 'theodore': 1, 'unoccupied': 2, 'hopkons': 1, 'primitive': 1, 'perfection': 2, 'vengeful': 4, 'retribution': 1, 'combining': 1, 'soundtracked': 1, 'steeler': 1, 'wheel': 3, 'rehearsing': 1, 'andr': 2, 'accent': 2, 'prevented': 1, 'understood': 1, 'remedy': 1, 'slapped': 1, 'concentrate': 1, 'earlier': 1, 'afred': 1, 'moe': 2, 'insist': 1, 'possibility': 2, 'heather': 1, 'instructed': 1, 'sawyer': 1, 'yippee': 2, 'yay': 2, 'towering': 1, 'mentioned': 1, 'catchphrase': 2, 'whoever': 1, 'wont': 1, 'kinney': 1, 'illustrated': 1, 'sixth': 2, 'grade': 4, 'muhammad': 1, 'zaire': 1, 'affluent': 1, 'griswald': 1, 'nursing': 2, 'policemen': 1, 'denmark': 4, 'ant': 2, 'grasshoppers': 1, 'dealt': 1, 'injury': 1, 'marauder': 1, 'reassembles': 3, 'collar': 3, 'stubborn': 1, 'graders': 2, 'solider': 3, 'singles': 2, 'dumped': 1, 'sugar': 3, 'arguments': 1, 'pitch': 1, 'launches': 1, 'musicians': 2, 'insinuate': 1, 'whiting': 1, 'hussey': 1, 'allocation': 1, 'lifter': 1, 'initials': 1, 'apted': 1, 'edmund': 2, 'pevensie': 4, 'eustace': 1, 'caspian': 1, 'presidents': 1, 'haughty': 1, 'attitude': 3, 'adapting': 1, 'utilizes': 1, 'acquire': 2, 'athletic': 1, 'ira': 2, 'defector': 2, 'accuses': 1, 'galvanizing': 1, 'offensive': 1, 'laziness': 1, 'marvelous': 1, 'rips': 1, 'participation': 1, 'devoted': 1, 'druggie': 1, 'unsinkable': 1, 'slay': 1, 'organize': 2, 'militia': 1, 'flees': 2, 'exile': 2, 'invades': 1, 'lawless': 2, 'desperate': 3, 'gravely': 1, 'pea': 2, 'soup': 2, 'vomit': 2, 'worth': 1, 'tutelage': 1, 'handy': 1, 'surprising': 1, 'flow': 1, 'sensation': 1, 'humbling': 1, 'counterespionage': 1, 'acquainted': 1, 'knot': 2, 'unreliable': 1, 'photographs': 1, 'assignments': 1, 'faster': 1, 'speeding': 1, 'bullet': 1, 'needle': 2, 'arabic': 1, 'peninsula': 1, 'saudi': 1, 'bearded': 1, 'bam': 1, 'margera': 1, 'communists': 2, 'mammal': 1, 'hydrgens': 1, 'oxygen': 1, 'prepared': 1, 'rainforest': 1, 'documentarians': 1, 'detonating': 1, 'grief': 2, 'uncovering': 1, 'closure': 1, 'unresolved': 1, 'cases': 1, 'unreciprocated': 1, 'haunt': 1, 'songwriter': 1, 'donald': 3, 'spoon': 1, 'medicine': 2, 'flair': 1, 'copier': 1, 'emitted': 1, 'clouds': 1, 'signalling': 1, 'visitor': 1, 'arrives': 2, 'urgent': 2, 'barada': 1, 'nikto': 1, 'kent': 3, 'handle': 1, 'suited': 1, 'tai': 1, 'janice': 1, 'fische': 1, 'jeremias': 1, 'evangelical': 2, 'requires': 2, 'swim': 1, 'mermaids': 1, 'subway': 1, 'platform': 1, 'wherever': 1, 'worldly': 1, 'insect': 2, 'burrows': 1, 'ripe': 1, 'fruit': 1, 'heathcliffe': 1, 'earnshaw': 1, 'chemotherapy': 1, 'moderate': 1, 'knack': 1, 'repairing': 1, 'cash': 6, 'thrust': 1, 'extends': 1, 'superpowers': 3, 'developing': 1, 'zany': 2, 'converse': 1, 'succeed': 1, 'pen': 3, 'loyal': 1, 'pose': 1, 'careless': 1, 'palahniuk': 1, 'mugging': 1, 'enables': 1, 'trainspotters': 1, 'sprinting': 1, 'whistle': 1, 'robs': 2, 'syrup': 2, 'guns': 2, 'eager': 1, 'verona': 1, 'induces': 1, 'endearing': 1, 'assorted': 1, 'chateau': 1, 'endangered': 1, 'preceding': 1, 'courteney': 2, 'troop': 1, 'landscape': 1, 'barracus': 1, 'murdoch': 1, 'uplifting': 2, 'chocolatier': 1, 'fezzik': 1, 'niccol': 1, 'elise': 1, 'deliberately': 1, 'crosses': 2, 'swamp': 1, 'brittany': 3, 'barden': 1, 'heavily': 1, 'mistreated': 1, 'scraping': 1, 'chigurh': 1, 'homemade': 1, 'rifle': 1, 'cultist': 1, 'motor': 2, 'strictly': 1, 'stockton': 1, 'greenwich': 1, 'bookstore': 1, 'hephburn': 1, 'ava': 2, 'gardner': 2, 'seduces': 1, 'lancaster': 4, 'wyler': 1, 'regards': 1, 'chapeaus': 1, 'cussler': 1, 'cleary': 1, 'deposed': 1, 'convincing': 1, 'enchanting': 1, 'cranston': 1, 'diplomatic': 1, 'zane': 1, 'caledon': 1, 'cal': 1, 'hockley': 1, 'geldof': 1, 'isolation': 1, 'cogan': 1, 'protected': 1, 'insomniac': 1, 'mlb': 1, 'peak': 1, 'steep': 1, 'offerred': 1, 'expand': 1, 'developer': 1, 'subdivision': 1, 'protesters': 1, 'disturbed': 1, 'assailants': 1, 'surprises': 2, 'corpse': 1, 'sewn': 1, 'stapled': 1, 'bolted': 1, 'settlers': 1, 'pushing': 1, 'dreyer': 1, '1928': 1, 'endeavors': 1, 'legion': 2, 'emblem': 1, 'mat': 1, 'meltdown': 1, 'spectacle': 1, 'bryner': 2, 'rameses': 1, 'yeoh': 2, 'ziyi': 2, 'zhang': 2, 'fantastic': 1, 'lock': 2, 'touch': 1, 'sum': 1, 'seas': 3, 'viciously': 1, 'sink': 3, 'difficulty': 1, 'manipulates': 1, 'grande': 1, 'desk': 1, 'jockeys': 2, 'gillespie': 1, 'holland': 1, 'elfman': 2, 'zanuck': 1, 'severely': 1, 'sacred': 1, 'text': 2, 'shorter': 1, 'denzil': 1, 'whitta': 1, 'accomplished': 1, 'physicist': 1, 'avowed': 1, 'radium': 1, 'mixes': 3, 'cloony': 1, 'merly': 1, 'flamboyant': 3, 'guilt': 1, 'bread': 1, 'alleged': 1, 'maximum': 1, 'interviews': 2, 'brandt': 2, 'gielgud': 2, 'jolly': 1, 'punched': 1, 'occasionally': 1, '00': 2, 'imaginary': 1, 'distressed': 1, 'hollowness': 1, 'stole': 1, 'investing': 1, 'careful': 1, 'effie': 1, 'trinket': 1, 'attach': 1, 'kerry': 1, 'bradford': 1, 'estelle': 1, 'parsons': 1, 'barrow': 1, 'everybody': 1, 'freudian': 1, 'jugian': 1, 'latter': 1, 'lasseter': 2, 'dubbing': 1, 'dialog': 1, 'mouth': 1, 'movements': 1, 'ruled': 3, 'explosion': 1, 'epa': 1, 'shuts': 1, 'ois': 1, 'coping': 1, 'myriad': 1, 'crises': 1, 'friendships': 1, 'dex': 1, 'gabourey': 3, 'sidibe': 3, 'harlem': 1, 'enroll': 1, 'alternative': 1, 'drafted': 1, 'gawky': 1, 'nerds': 3, 'joined': 1, 'heartless': 1, 'tycoons': 1, 'longtime': 1, 'stoick': 1, 'vast': 1, 'aspires': 1, 'bannion': 2, 'politically': 4, 'syndicate': 3, 'kinnear': 1, 'finals': 1, 'pageant': 1, 'vw': 1, 'katy': 1, 'michell': 1, 'eliminates': 1, 'replicas': 1, 'henri': 1, 'georges': 2, 'clouzot': 1, 'mistress': 1, 'winkler': 1, 'majors': 1, 'researcher': 2, 'welded': 1, 'exploded': 1, 'promoting': 1, 'chicks': 2, 'aware': 1, 'explosions': 1, 'dobbs': 2, 'mine': 2, 'sierra': 1, 'madre': 1, 'mountains': 1, 'traven': 1, 'racism': 2, 'antartica': 1, 'medlock': 1, 'terrifyingly': 1, 'wed': 2, 'chihiro': 1, 'bathe': 1, 'bathhouse': 1, 'wrecking': 1, 'cranky': 1, 'populous': 1, 'georgian': 1, 'joads': 1, 'oklahomah': 1, 'disadvantaged': 1, 'plainview': 1, 'deserted': 1, 'sentient': 3, 'hamburger': 1, 'rihanna': 2, 'effron': 1, 'forgets': 2, 'kim': 4, 'krickitt': 1, 'thurnan': 1, 'packing': 1, 'handicap': 1, 'zoro': 1, 'trafficking': 1, 'morrocco': 1, 'fitzgerald': 2, 'linearly': 1, 'attended': 2, 'liste': 1, 'americas': 1, 'loonies': 1, 'skivvies': 1, 'interweaves': 2, 'pals': 2, 'milo': 1, 'til': 2, 'forays': 1, 'bree': 1, 'daniels': 2, 'shanghai': 1, 'killings': 1, 'weaving': 1, 'attacus': 1, 'handyman': 1, 'suspended': 1, 'comedian': 3, 'whoopie': 1, 'shouting': 2, 'beer': 1, 'hippos': 1, 'scharzeneiger': 1, 'horrid': 1, 'enthusiasts': 2, 'spotting': 1, 'thinkin': 1, 'bishop': 1, 'cathedral': 1, 'prays': 1, 'carson': 2, 'traumatic': 1, 'merciless': 1, 'nimr': 1, 'antal': 1, 'nien': 1, 'jen': 1, 'elaine': 1, 'jin': 1, 'issei': 1, 'ogata': 1, 'polar': 1, 'caps': 1, 'begun': 2, 'melt': 1, 'conflicting': 1, 'opponents': 1, 'bursting': 1, 'hugging': 1, 'kristi': 2, 'resides': 1, 'cratchit': 1, 'gonzo': 1, 'moss': 1, 'extravaganza': 1, 'rebooted': 2, 'darker': 1, 'grittier': 1, 'tyson': 1, 'decrepit': 1, 'nitroglycerine': 1, 'shipment': 1, 'equipment': 1, 'alaska': 3, 'greenpeace': 1, 'volunteer': 1, 'rapidly': 1, 'fascist': 1, 'stepdaughter': 1, 'eerie': 4, 'captivating': 1, 'ferrara': 1, 'finzi': 1, 'contini': 1, 'aristocratic': 1, 'urbane': 1, 'abducting': 1, 'transforming': 2, 'maidens': 1, 'nearby': 1, 'trapp': 2, 'unfortunately': 1, 'annual': 4, 'mortal': 3, 'likely': 1, 'expulsion': 1, 'ghostly': 2, 'terminator': 1, 'conceive': 1, 'impress': 1, 'guardian': 1, 'climactic': 1, 'awoken': 1, 'strugges': 1, 'morals': 1, 'seniors': 2, 'throw': 2, 'midnight': 1, 'vulnerable': 1, 'dinero': 2, 'defended': 1, 'pranksters': 1, 'shock': 1, 'barrel': 1, 'sandy': 2, 'claws': 1, 'heal': 1, 'disarm': 1, 'describe': 1, 'reckoning': 1, 'waxing': 1, 'cratchet': 1, 'mcallister': 1, 'bashful': 1, 'sleepy': 1, 'sneezy': 1, 'worry': 1, 'suitors': 1, 'capable': 1, 'unlocking': 1, 'hansen': 1, 'skin': 1, 'rip': 1, 'replace': 1, 'dawes': 1, 'wyatt': 2, 'earp': 2, 'holliday': 1, 'southwestern': 1, 'recieve': 1, 'wrapper': 1, 'demise': 1, 'covert': 2, 'expedition': 1, 'dicky': 1, 'eklund': 1, 'yourself': 1, 'federal': 3, 'rampant': 1, 'haller': 1, 'maccormack': 1, 'motto': 1, 'brightest': 1, 'blackest': 1, 'shall': 1, 'dent': 1, 'chopsticks': 1, 'accomplish': 1, 'blurb': 1, 'increase': 1, 'aviator': 2, 'burst': 1, 'victoria': 1, 'lightening': 1, 'ra': 1, 'ghul': 1, 'irene': 2, 'dunne': 2, 'divorcing': 1, 'undermine': 1, 'isao': 1, 'takahata': 1, 'akiyuki': 1, 'nosaka': 1, 'mookie': 1, 'compared': 1, 'cuddly': 1, 'tabloid': 1, 'liquor': 1, 'soaked': 1, 'barbossa': 1, 'elusive': 1, 'levant': 2, 'sal': 1, 'mineo': 1, 'intentions': 1, 'explain': 1, 'finance': 2, 'design': 1, 'deputy': 1, 'marlene': 1, 'dietrich': 1, 'saloon': 2, 'gal': 1, 'frenchie': 1, 'nomiated': 1, 'momoa': 3, 'determination': 1, 'sexually': 2, 'harrasses': 1, 'batemen': 1, 'disgruntled': 3, 'inherits': 1, 'gelfling': 1, 'goblins': 1, 'mcteigue': 1, 'neville': 1, 'idle': 1, 'fanciful': 1, 'ambassador': 2, 'divine': 2, 'boozer': 1, 'willed': 1, 'copies': 1, 'overtones': 1, 'coaxed': 1, 'walked': 3, 'enthralled': 1, 'leagues': 1, 'basinger': 1, 'lynn': 1, 'bracken': 1, 'interracial': 1, 'bare': 1, 'huntsman': 2, 'leterrier': 1, 'edmond': 1, 'foolish': 1, 'bend': 1, 'arrow': 1, 'fuzzy': 1, 'lollipop': 1, 'mario': 3, 'andreacchio': 1, 'puppy': 1, 'puzo': 2, 'gripping': 1, 'genuine': 1, 'bogus': 1, 'kellogg': 1, 'macfadyen': 1, 'stevenson': 2, 'swashbuckling': 1, 'marisa': 1, 'tomei': 1, 'seller': 1, 'sleazy': 1, 'foolproof': 2, 'metallurgy': 2, 'plutonium': 1, 'processing': 1, 'purposefully': 1, 'psychologically': 1, 'exposing': 1, 'blatant': 1, 'violations': 1, 'rebellious': 2, 'accordance': 1, 'aquinas': 1, 'playback': 1, 'fed': 2, 'mundane': 2, 'bobs': 1, 'celebrity': 2, 'haircut': 1, 'stems': 1, 'lawsuits': 1, 'plump': 1, 'whi': 1, 'ch': 1, 'aimlessly': 1, 'colonial': 1, 'extraterrestrials': 2, 'fading': 1, 'mysteriously': 1, 'viewers': 2, 'belt': 1, 'wlaberg': 1, 'vocal': 1, 'hemisphere': 1, 'profession': 2, 'brook': 2, 'granddaughter': 1, 'noah': 1, 'baumbach': 1, 'uninterested': 1, 'bolshevik': 1, 'instrumental': 1, 'titles': 2, 'russ': 1, 'tamblyn': 1, 'acrobat': 1, 'richards': 1, 'rarely': 1, 'falstaff': 1, 'roistering': 1, 'blending': 1, 'stroll': 1, 'pat': 4, 'morita': 2, 'mentors': 1, 'jaden': 1, 'disobeys': 1, 'giamatti': 4, 'haden': 2, 'disappointment': 1, 'anchorman': 1, 'chenery': 1, 'tweedy': 1, 'colleagues': 1, 'unbeaten': 1, 'heavens': 1, 'hedges': 2, 'ahmet': 1, 'zappa': 1, 'bargained': 1, 'manhunt': 1, 'lowly': 1, 'gardener': 1, 'utterances': 1, 'phoebe': 1, 'cates': 1, 'stoned': 1, 'spicoli': 1, 'therman': 1, 'hood': 1, 'hoods': 1, 'examine': 1, 'sudden': 1, 'hansel': 1, 'gretel': 1, 'operatives': 2, 'wage': 2, 'meteorite': 1, 'bump': 1, 'bourgeois': 1, 'misogynistic': 1, 'snobbish': 2, 'phonetics': 1, 'stud': 1, 'judith': 1, 'rossner': 1, 'portland': 1, 'dector': 1, 'obnoxious': 1, 'flaherty': 1, 'doren': 1, 'shelton': 1, 'bankrupt': 1, 'freedonia': 1, 'sylvania': 1, 'teasdale': 1, 'penniless': 1, 'bud': 1, 'cort': 1, 'napping': 1, 'treacherous': 1, 'pedal': 1, 'reddy': 1, 'breadwinner': 1, 'allison': 1, 'pearson': 1, 'critters': 1, '1825': 2, '1884': 1, 'sudan': 1, 'mason': 2, 'resigns': 1, 'regiment': 1, 'rebels': 2, 'cheer': 1, 'keach': 2, 'lockwood': 1, 'visitors': 1, 'posse': 1, 'attempted': 1, 'persona': 1, 'steed': 1, 'ronald': 1, 'reagan': 1, 'nun': 2, 'prejean': 1, 'bellas': 1, 'campus': 2, 'deborah': 3, 'kerr': 3, 'rushing': 1, 'zinnemann': 3, 'focus': 3, 'amir': 1, 'lev': 1, 'spark': 1, 'rush': 1, 'berle': 1, 'deteriorate': 1, 'clinton': 1, '44': 2, 'magnums': 1, 'fiery': 1, 'archery': 1, 'slap': 1, 'achieve': 1, 'humiliating': 1, 'stuffy': 1, 'dimensions': 2, 'marty': 2, 'mcfly': 1, 'wrecks': 1, 'evolve': 1, 'missile': 1, 'sic': 1, 'shaken': 1, 'contribution': 1, 'illinois': 1, 'governor': 1, 'blagojevich': 1, 'referee': 1, 'mathilda': 1, 'treebeard': 1, 'anthropomorphic': 2, 'stark': 2, 'avant': 1, 'garde': 1, 'conflicted': 1, 'loyalties': 1, 'aint': 1, 'administers': 1, 'disturbing': 1, 'lethal': 1, 'tests': 1, 'unwilling': 1, 'subjects': 1, 'klondike': 1, '1925': 1, 'defied': 1, 'adversary': 1, 'amputated': 1, 'laboratory': 1, 'mgm': 1, 'garbo': 1, 'dockworker': 1, 'orchestrated': 1, 'condensed': 1, '360': 1, 'igor': 1, 'disastrous': 1, 'vengeance': 2, 'empty': 1, 'bluth': 1, 'robery': 1, 'championed': 2, 'clsssic': 1, 'popularize': 1, 'maritime': 1, 'berg': 2, 'sank': 1, '1912': 1, 'backstabbed': 1, 'reversal': 1, 'transylvania': 2, 'attain': 1, 'sprague': 1, 'grayden': 1, 'molly': 1, 'ephraim': 1, 'sends': 2, 'cameos': 1, 'rajon': 1, 'rondon': 1, 'dwight': 1, 'dusty': 1, 'butterfield': 1, 'cinematographer': 1, 'weirdness': 1, 'unravel': 1, 'polynesians': 1, 'settled': 1, 'macfarlane': 4, 'mustached': 1, 'hooper': 1, 'renounce': 1, '942': 1, 'aaaron': 1, 'moretz': 1, 'shooter': 1, 'fighters': 1, 'narratives': 1, 'mayhem': 1, 'bile': 1, 'duct': 1, 'streak': 1, 'continuing': 1, 'sweep': 2, 'puttin': 1, 'ritz': 1, 'unscathed': 1, 'delarge': 1, 'droogs': 1, 'workings': 1, 'schrek': 1, 'jackosn': 1, 'competitive': 1, 'capella': 1, 'inflates': 1, 'absurd': 1, 'contest': 1, 'reservation': 1, 'timid': 2, 'villainous': 1, 'likeable': 1, 'gru': 1, 'sylvain': 1, 'pricks': 1, 'finger': 2, 'slumber': 1, 'sticking': 1, 'armored': 1, 'letting': 1, 'founds': 1, 'dissolves': 1, 'closest': 1, 'slain': 1, 'feminine': 1, 'delema': 1, 'honcho': 1, 'despises': 1, 'mashing': 1, 'westerners': 1, 'unworldly': 1, 'heady': 1, 'rian': 1, 'represent': 2, 'exact': 1, 'deplorable': 1, 'nightmares': 1, 'signature': 1, 'bladed': 1, 'carbonite': 1, 'humphry': 2, 'extensive': 1, 'surgery': 1, 'adele': 1, 'shattered': 1, 'telepathy': 1, 'assembles': 1, 'catwoman': 1, 'besieged': 1, 'crook': 1, 'reaves': 1, 'aziz': 2, 'ansari': 2, 'orcs': 1, 'commonly': 1, 'scientifically': 1, 'inaccurate': 1, 'recreations': 1, 'extinct': 1, 'clare': 1, 'alot': 1, 'bordering': 1, 'gravity': 1, 'hereo': 1, 'katara': 1, 'waterbender': 1, 'disobeying': 1, 'overprotective': 1, 'starters': 1, 'slum': 1, 'development': 1, 'districts': 1, 'fables': 1, 'dread': 1, 'rockumentary': 1, 'improve': 1, 'enslavement': 1, 'cobwebs': 1, 'ideals': 1, 'obelisk': 1, 'accelerates': 1, 'evolution': 1, 'selick': 1, 'spread': 1, 'cirus': 1, 'romp': 1, 'outing': 1, 'below': 1, 'review': 1, 'rotten': 1, 'tomatoes': 1, 'arrived': 1, 'chenoweth': 1, 'cloris': 1, 'leachman': 1, 'meagan': 1, 'holder': 1, 'gamma': 1, 'phi': 1, 'beta': 1, 'sorority': 1, 'contacted': 1, 'playthings': 1, 'flanagan': 1, 'harasses': 1, 'musante': 1, 'acquaintances': 1, 'level': 1, 'minus': 1, 'baggage': 1, 'sully': 1, 'encountered': 1, 'patterson': 1, 'uptight': 1, 'relax': 1, 'werner': 1, 'herzog': 1, 'treadwell': 1, 'amie': 1, 'huguenard': 1, 'activists': 1, 'hallucinations': 1, 'pyramid': 1, 'bolton': 1, 'driving': 4, 'hour': 1, 'crowd': 2, 'stunning': 2, 'newer': 1, 'creepy': 2, 'dolls': 1, 'button': 1, 'overrated': 1, 'interplanetary': 1, 'jedis': 1, 'gostling': 1, 'policia': 1, 'warp': 1, 'backstabbing': 1, 'material': 1, 'sled': 1, 'zimba': 1, 'believing': 1, 'moor': 1, 'venice': 1, 'doubts': 1, 'virtue': 1, 'mod': 1, 'confuse': 1, 'goodfellas': 1, 'roadside': 1, 'heck': 1, 'moralistic': 1, 'cryptography': 1, 'bankers': 1, 'raiding': 1, 'coulda': 1, 'hunts': 1, 'plants': 1, 'laser': 1, 'countless': 1, 'deviant': 1, 'investigators': 1, 'curly': 2, 'matrix': 1, 'mixing': 1, 'anarchist': 1, 'trippy': 1, 'distractions': 1, 'prolonged': 1, 'attracts': 2, 'goslin': 1, 'sorts': 1, 'nasty': 2, 'ernst': 1, 'lubitsch': 1, 'jeanette': 1, 'macdonald': 1, 'horizon': 1, 'vigo': 1, 'chico': 1, 'harpo': 1, 'carlisle': 1, 'ruggles': 1, 'barry': 2, 'ace': 1, 'astor': 1, 'mankiewicz': 3, 'prospecting': 1, 'scoring': 1, 'total': 1, 'despatch': 1, 'schaefer': 1, 'pierson': 1, 'confronting': 2, 'emptiness': 1, 'coldness': 1, 'vidor': 1, 'tumultuous': 1, 'relations': 1, 'auclair': 1, 'scottie': 2, 'novak': 1, 'bel': 1, 'geddes': 1, 'franju': 1, 'controvery': 1, 'rossen': 1, 'piper': 1, 'laurie': 2, 'provine': 1, 'pollard': 1, 'vivacious': 1, 'harmonica': 1, 'hopper': 1, 'topol': 1, 'norma': 1, 'frey': 1, 'martino': 1, 'roscoe': 1, 'browns': 1, 'dern': 2, 'colleen': 1, 'dewhurst': 1, 'schneider': 1, 'jacqueline': 1, 'bisset': 1, 'leaud': 1, 'valentina': 1, 'cortese': 1, 'frankenstein': 2, 'fim': 1, 'gardenia': 1, 'madeline': 2, 'kringle': 1, 'heater': 1, 'marin': 1, 'rene': 1, 'daalder': 1, 'resorts': 1, 'remick': 1, 'dignity': 1, 'fisher': 2, 'somewhere': 1, 'adventured': 1, 'mashed': 1, 'potatoes': 1, 'margot': 1, 'kidder': 1, 'raises': 1, 'mining': 1, 'fransisco': 1, 'whil': 1, 'togeather': 1, 'adoptive': 1, 'upbringing': 1, 'eikenberry': 1, 'helmond': 1, 'siamese': 1, 'terminate': 2, 'olmos': 1, 'contracted': 2, 'colorado': 1, 'alliance': 1, 'militarily': 1, 'wolverines': 1, 'oust': 1, 'occupiers': 1, 'barash': 1, 'levinson': 1, 'nowhere': 1, 'mentor': 3, 'cambodia': 1, 'annie': 2, 'potts': 1, 'unicorns': 1, 'becker': 1, 'depalma': 2, 'hunger': 2, 'fingered': 1, 'henchman': 1, 'inigo': 1, 'montoya': 1, 'bootcamp': 1, 'toontown': 1, 'debutante': 1, 'cynical': 1, 'enduring': 1, 'superstars': 1, 'lucchese': 1, 'conway': 1, 'jeannot': 1, 'szwarc': 1, 'hierarchy': 1, 'hairstylist': 1, 'hedge': 1, 'trimmer': 1, 'ninny': 1, 'threadgoode': 1, 'contestants': 1, 'paquin': 2, 'campion': 2, 'lusted': 1, 'neill': 2, 'wang': 1, 'histories': 1, 'keys': 1, 'jan': 1, 'bont': 1, 'morrow': 1, 'bradd': 1, 'manga': 1, 'masamune': 1, 'shirow': 1, 'zuker': 2, 'ormond': 1, 'deception': 1, 'showcased': 1, 'ricks': 1, 'nephew': 1, 'exclaiming': 1, 'technique': 1, 'sizemore': 1, 'loaf': 1, 'ambulance': 1, 'sanity': 1, 'taymor': 1, 'diedrich': 1, 'bader': 1, 'sear': 1, 'communicated': 1, 'shuffling': 1, 'cate': 2, 'blanchett': 2, 'mangold': 2, 'dominique': 1, 'bauby': 1, 'syndrome': 1, 'jonas': 1, 'shimit': 1, 'amin': 1, 'brunhilde': 1, '2019': 1, 'strode': 1, 'trailer': 1, 'grindhouse': 1, 'coulter': 1, 'tennant': 1, 'gio': 1, 'perez': 1, 'amber': 1, 'valletta': 1, 'crowley': 1, 'sequal': 1, 'carlsbad': 1, 'agreeing': 1, 'szostak': 1, 'vogel': 1, 'turteltaub': 1, 'molina': 1, 'teresa': 2, 'palmer': 2, 'jorma': 1, 'taccone': 1, 'phillippe': 2, 'balfor': 2, 'faison': 1, 'johnston': 1, 'jovovich': 2, 'weir': 2, 'siberian': 1, 'totally': 1, 'eighth': 1, 'luketic': 1, 'selleck': 1, 'wilkinson': 1, 'schwentke': 1, 'caregivers': 1, 'mutual': 1, 'murdock': 1, 'ferrera': 1, 'carlos': 3, 'mencia': 1, 'lance': 1, 'regina': 1, 'dalton': 1, 'berkoff': 1, 'designer': 2, 'coined': 2, 'murderess': 1, 'faulty': 2, 'rosario': 1, 'dawson': 1, 'marriages': 2, 'holy': 1, 'behalf': 1, 'totalitarion': 1, 'organization': 2, 'crashed': 1, 'rhode': 1, 'fogler': 2, 'isla': 1, 'abigail': 1, 'breslin': 1, 'saldanha': 2, 'convention': 1, 'roulet': 1, 'dugan': 2, 'perlman': 1, 'foy': 1, 'nicholls': 1, 'mckendry': 1, 'purcell': 1, 'seminary': 1, 'olsen': 1, 'levine': 1, 'greenfield': 1, 'egglesfield': 1, 'krasinski': 1, 'mylod': 1, 'scorses': 1, 'dowse': 1, 'structure': 1, 'morra': 1, 'sarsgaard': 1, 'mullan': 1, 'striped': 1, 'exiled': 1, 'tail': 1, 'valerie': 1, 'racers': 1, 'gluck': 1, 'jenna': 1, 'clarkson': 1, 'fuller': 1, 'dominik': 1, 'jenkins': 1, 'gandolfini': 1, 'biology': 1, 'throwing': 1, 'ayoade': 1, 'rosemarie': 1, 'dewitt': 1, 'evanovich': 1, 'evade': 1, 'brent': 2, 'lorenzo': 1, 'bonaventura': 1, 'sucsy': 1, 'garner': 2, 'tours': 1, 'sienna': 1, 'guillory': 1, 'skarsgard': 1, 'lauter': 1, 'targets': 2, 'blunderbuss': 1, 'midwestern': 1, 'famke': 1, 'janssen': 1, 'julianna': 1, 'guilll': 1, 'felton': 1, 'thandie': 1, 'tarsem': 1, 'singh': 1, 'cavill': 1, 'freida': 1, 'pinto': 1, 'phaedra': 1, 'callahand': 1, 'magnum': 1, 'revolver': 1, 'cinderella': 1, 'pathetic': 1, 'fresh': 2, 'adventrues': 1, 'penguin': 3, 'mumble': 1, 'homeland': 1, 'tenaciously': 1, 'napped': 1, 'hyenas': 1, 'stroker': 1, 'unimaginable': 1, 'nieson': 1, 'stopping': 1, 'snowy': 1, 'centric': 1, 'legions': 1, 'workout': 1, 'narcotic': 1, 'addicted': 2, 'dickey': 1, 'meerkat': 1, 'barrier': 1, 'reef': 1, 'seinfeld': 3, 'fiber': 1, 'druggies': 1, 'sin': 1, 'aide': 1, 'hakkuna': 1, 'mattata': 1, 'emotionless': 1, 'fails': 1, 'casket': 1, 'biting': 1, 'projectile': 1, 'vomiting': 1, 'rituals': 1, 'awareness': 1, 'fracking': 1, 'unravels': 1, 'folks': 1, 'marquand': 1, 'connect': 1, 'less': 1, 'draped': 1, 'coward': 1, 'interacting': 1, 'separate': 3, 'dedicated': 1, 'coachroach': 1, 'slime': 1, 'interactions': 1, 'cabra': 1, 'chaffrey': 1, 'patton': 2, 'catapulted': 1, 'corddry': 1, 'giffin': 1, 'mccoy': 1, 'waugh': 1, 'alexis': 1, 'knapp': 1, 'fatt': 1, 'vergara': 1, 'attend': 1, 'galaxies': 1, 'moriarty': 1, 'israelites': 1, 'receiving': 1, 'tablets': 1, 'haples': 1, 'womanizer': 1, 'severe': 1, 'platonic': 1, 'lapd': 1, 'juliane': 1, 'separates': 1, 'wing': 1, 'inception': 1, 'ratings': 1, 'hillside': 1, 'ving': 1, 'rhames': 1, 'endure': 1, 'hurdles': 1, 'platoon': 1, 'impressive': 1, 'firearm': 2, 'moeller': 1, 'boles': 2, 'lionel': 1, 'atwill': 1, 'riccardo': 1, 'baroni': 1, 'lassparri': 1, 'federic': 1, 'carven': 1, 'teapot': 1, 'webb': 1, 'tyrannical': 1, 'jade': 1, 'whiz': 1, 'chained': 1, 'bathroom': 2, 'jessical': 1, '22': 1, 'barbera': 1, 'daft': 1, 'reuben': 1, 'heals': 1, 'letterman': 1, 'tilda': 1, 'swinton': 1, 'berkeley': 1, 'breathed': 1, 'crusades': 1, 'saoirse': 1, 'ronan': 1, 'lesster': 1, 'defenders': 1, 'cactus': 1, 'flinn': 1, 'banded': 1, 'supervillain': 1, 'britian': 1, 'cocky': 1, 'birthmark': 1, 'standardized': 1, 'scores': 1, 'asner': 1, 'newhart': 1, 'woodstock': 1, 'helpful': 1, 'biz': 1, 'claymation': 1, 'encampment': 1, 'alda': 1, 'comidy': 1, 'theives': 1, 'copycats': 1, 'illusions': 1, 'awakened': 1, 'lifelike': 1, 'profitable': 1, 'health': 1, 'dungeon': 1, '54': 1, 'resistance': 1, 'bonnet': 1, 'miscreant': 1, 'incapable': 1, 'bilk': 1, 'investors': 1, 'prestige': 1, 'exorcised': 1, 'rouge': 1, 'barbarian': 1, 'mcgowan': 1, 'thurmeier': 1, 'allcott': 1, 'carnahan': 1, 'eyed': 2, 'documented': 1, 'dunns': 1, 'weston': 1, 'shakespheare': 1, 'brannaugh': 1, 'thorton': 1, 'specialist': 1, 'conversations': 1, 'intertwining': 1, 'busey': 1, 'knew': 1, 'tawanda': 1, 'vehicular': 1, 'paralyzed': 1, 'docks': 1, 'peeta': 1, 'mellark': 1, 'messed': 2, 'carrol': 1, 'lamborgini': 1, 'hijacked': 1, 'seaton': 1, 'keri': 1, 'tractor': 1, 'unpopular': 1, 'reputations': 1, 'jazmine': 1, 'lease': 1, 'letters': 1, 'fascism': 1, 'extreme': 1, 'reactor': 1, 'leveled': 1, 'lilliputuans': 1, '1050': 1, 'kinds': 1, 'mya': 1, 'lil': 1, 'collaborated': 1, 'weakness': 1, 'kryptonite': 1, 'hideously': 1, 'correct': 1, 'administrative': 1, 'carradine': 1, 'envelope': 1, 'concieve': 1, 'ash': 1, 'kooky': 1, 'schwarzeneggar': 1, 'veterinary': 1, 'studies': 1, 'blended': 1, 'galactic': 1, 'armbands': 1, 'typewriter': 1, 'perennial': 1, 'visitations': 1, 'lear': 1, 'rent': 2, 'knock': 1, 'poppins': 1, 'whipping': 1, 'sorcery': 1, 'marcus': 1, 'nispel': 1, 'dementors': 1, 'silverbacks': 1, 'helpers': 1, 'teri': 1, 'wet': 1, 'horrendous': 1, 'acient': 1, 'mention': 1, 'hatchet': 1, 'defraud': 1, 'bending': 1, 'segorney': 1, '39': 1, 'gilbert': 1, 'consciousness': 1, 'dicks': 1, 'lauded': 1, 'yann': 1, 'martel': 1, 'miramax': 1, 'janero': 1, 'amplifier': 1, 'sparkly': 1, 'reforms': 1, 'discuss': 1, 'outsiders': 1, 'absurdities': 1, 'contradictions': 1, 'hackford': 1, 'cadet': 1, 'originate': 1, 'miserly': 1, 'thrift': 1, 'sloppy': 1, 'formulaic': 1, 'flips': 1, 'ax': 1, 'coffee': 1, 'regardless': 1, 'resulting': 1, 'voilent': 1, 'wraith': 1, 'laguna': 1, 'grisly': 1, 'baja': 1, 'tapes': 1, 'olds': 1, 'filling': 1, 'lovably': 1, 'disinherited': 1, 'favreu': 1, 'nineteenth': 1, 'rumplestiltskin': 1, 'dug': 1, 'designed': 1, 'apeocolypse': 1, 'gaming': 1, '2074': 1, 'masters': 1, 'gothem': 1, 'fending': 1, 'blizzards': 1, 'acheron': 1, 'expensive': 1, 'queequeg': 1, 'ishmael': 1, 'shaolin': 1, 'axe': 1, 'chop': 1, 'wifes': 1, 'simulated': 1, 'thaqt': 1, 'blinded': 1, 'courted': 1, 'supper': 1, 'menaced': 1, 'munched': 1, 'july': 1, 'compiled': 1, 'clips': 1, 'ineffective': 1, 'landmark': 1, 'stroke': 1, 'firearms': 1, 'initial': 1, 'menacing': 1, 'casino': 1, 'matters': 1, 'calling': 1, 'scorpio': 1, 'menaces': 1, 'nails': 1, 'callahan': 1, 'ferret': 1, 'unexpected': 1, 'harrowing': 1, 'fic': 1, 'strands': 1, 'epiphany': 1, 'expressing': 1, 'athlete': 1, 'sabermetrics': 1, 'dehaan': 1, 'iconically': 1, 'gainesville': 1, 'ripper': 1, 'thrice': 1, 'scratched': 1, 'marsellus': 1, 'mccallister': 1, 'labeled': 1, 'tolstoy': 1, 'foray': 1, 'crafted': 1, 'sicily': 1, 'confined': 1, 'chair': 1, 'window': 1, 'martindale': 1, 'uttering': 1, 'praise': 1, 'narnia': 1, 'infiltrate': 1, 'personal': 1, 'idiodic': 1, 'obscene': 1, 'notoriety': 1, 'glows': 1, 'heche': 1, 'lobbyist': 1, 'defection': 1, 'ascending': 1, 'gotten': 1, 'listed': 1, 'allens': 1, 'composers': 1, 'bombs': 1, 'varying': 1, 'landed': 1, 'whos': 1, 'pacifists': 1, 'easily': 1, 'vegetarians': 1, 'smitth': 1, 'nsa': 1, 'goons': 1, 'key': 1, 'motivated': 1, 'mckay': 1, 'dieter': 1, 'cunth': 1, 'heartedly': 1, 'devious': 1, 'northwest': 1, 'koreans': 1, 'prefects': 1, 'bath': 1, 'mull': 1, 'regularly': 1, 'scheduled': 1, 'traveler': 1, 'recreated': 1}\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "d7pIJmZCgRMA"
+ },
+ "source": [
+ "### 1.2.4 Cumulative token frequency"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "1mi4Vw5ABFXg",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 529
+ },
+ "outputId": "0e421a91-75a4-4b2d-b5cb-8fb1bd2bac98"
+ },
+ "source": [
+ "# Plot the cumulative distribution of token frequency\n",
+ "def cumulative_token_frequency(series, limit=20):\n",
+ " '''\n",
+ " Input:\n",
+ " series - pd.Series of words\n",
+ " Output:\n",
+ " [plot] - cumulative distribution of token frequency\n",
+ " '''\n",
+ " corpus=[word for word in series]\n",
+ " counter=Counter(corpus)\n",
+ " tokens_count = dict(counter).items()\n",
+ "\n",
+ " prop_list = []\n",
+ " print(\"Vocabulary Size: \", len(tokens_count))\n",
+ " for i in range(limit):\n",
+ " tokens_filtered = len(list(filter(lambda x: x[1]<=i, tokens_count)))\n",
+ " prop_list.append(round(tokens_filtered*100/len(tokens_count),2))\n",
+ " a4_dims = (11.7, 8.27)\n",
+ " fig, ax = plt.subplots(figsize=a4_dims)\n",
+ " plt.plot(prop_list)\n",
+ " plt.grid()\n",
+ " plt.xlabel(\"Counts\")\n",
+ " plt.ylabel(\"Proportion of Vocabulary (%)\")\n",
+ " # print(\"Proportion of unique words less than\",limit,\": \", round(tokens_filtered*100/len(tokens_dict),2),\"%\")\n",
+ "\n",
+ "cumulative_token_frequency(df[\"Word\"])"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Vocabulary Size: 10987\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "KE4VV7X4fUwj"
+ },
+ "source": [
+ "### 1.2.5 Entity Frequency"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "dB_9MvjOAoqq",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 553
+ },
+ "outputId": "4f8faa61-0504-4701-de7f-d825b78dec00"
+ },
+ "source": [
+ "tag_counter = plot_top_non_stopwords_barchart(df[\"Tag\"], top=25, word=False)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "There are 25 distinct tags in dataset\n",
+ "{'B-Actor': 5010, 'I-Actor': 6121, 'O': 55895, 'B-Plot': 6468, 'I-Plot': 62107, 'B-Opinion': 810, 'I-Opinion': 539, 'B-Award': 309, 'I-Award': 719, 'B-Year': 2702, 'B-Genre': 3384, 'B-Origin': 779, 'I-Origin': 3340, 'B-Director': 1787, 'I-Director': 1653, 'I-Genre': 2283, 'I-Year': 195, 'B-Soundtrack': 50, 'I-Soundtrack': 158, 'B-Relationship': 580, 'I-Relationship': 1206, 'B-Character_Name': 1025, 'I-Character_Name': 760, 'B-Quote': 126, 'I-Quote': 817}\n"
+ ],
+ "name": "stdout"
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "YPCRkkm768sx"
+ },
+ "source": [
+ "\n",
+ "## Conclusion after analysis"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "BhDBB8847Dev"
+ },
+ "source": [
+ "### Vocab\n",
+ "1. The vocab size is 10987 (excluding stopwords). This is quite a large dataset.\n",
+ "2. Our dataset it comprised of long sentences, with average length of 20 and median length of 19, and the length ranges from 1 to 71. Only a small amount of sentences have the length greater than 40 => set the max length equal to 40 => need a lot of padding tokens.\n",
+ "3. 45% of the vocabulary only occur once but they could be person's names so let's keep them.\n",
+ "4. Year: can be replaced by a common\n",
+ "5. Number in text: can be replaced by a common\n",
+ "6. Lemmatization: *films* to *film*\n",
+ "7. All words are in lowercase.\n",
+ "8. No punctuations.\n",
+ "9. No informal text.\n",
+ "10. Lots of abbreviation (like *i'm, i'll, can't, 2morrow*)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "fRaEnZIe8UWy"
+ },
+ "source": [
+ "### Tags\n",
+ "1. Most of tags are short, with average length of 2 words and median length of 4 words. There're some long tag and the longest tag has 40 words. This could be a plot description.\n",
+ "2. Most sentences are about plots.\n",
+ "3. There are 25 classes of entities, divided to 3 categories: B Tags (Beginning of an entity), I Tag (Intermediate Entity), Or None Tag (O). Proportions of B,I,O are about 14.50%, 50.31%, 35.19% respectively (section 2.7 Check the dataset imbalance)\n",
+ "4. Most of the tags are in the minority and O is the most common entity => need to over-sample the tags from the minority groups.\n",
+ "5. As under the section 2.7 Check the dataset imbalance, the percentage of sentences that only contain O tags -> 0.02% => It's small amount so we don't need to delete those sentences.\n",
+ "6. ALso as under the section 2.7 Check the dataset imbalance, the percentage of OOV tokens in test set -> 3.34%"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Dtr2P3ZE3sE6"
+ },
+ "source": [
+ "\n",
+ "# Part 2: Pre-process the data\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "MRofcZfAUd9g"
+ },
+ "source": [
+ "\n",
+ "## 2.1 Stemming"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "1J6BsedjUeOf"
+ },
+ "source": [
+ "def stem_sentence(sentence):\n",
+ " sentence = sentence.split(' ')\n",
+ " stemmer = PorterStemmer()\n",
+ " result = [stemmer.stem(word) for word in sentence]\n",
+ " stemmed_sentence = ' '.join(result)\n",
+ " return stemmed_sentence"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "4p6nLSAePE9U"
+ },
+ "source": [
+ "\n",
+ "## 2.2 Lemmatization"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "reh2ym9rO2PM",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "8aeeca0a-9089-4146-db88-b7ba43d9f6b5"
+ },
+ "source": [
+ "nltk.download('punkt')\n",
+ "nltk.download('wordnet')\n",
+ "def lemmatize_sentence(sentence):\n",
+ " tokenization = nltk.word_tokenize(sentence)\n",
+ " lemmatizer = WordNetLemmatizer()\n",
+ " result = [lemmatizer.lemmatize(word) for word in tokenization]\n",
+ " lemmatized_sentence = ' '.join(result)\n",
+ " return lemmatized_sentence"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "[nltk_data] Downloading package punkt to /root/nltk_data...\n",
+ "[nltk_data] Unzipping tokenizers/punkt.zip.\n",
+ "[nltk_data] Downloading package wordnet to /root/nltk_data...\n",
+ "[nltk_data] Unzipping corpora/wordnet.zip.\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "Hm9UnKBqUeZA"
+ },
+ "source": [
+ "\n",
+ "## 2.3 Replacement"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "EZzhc1zBUeiv"
+ },
+ "source": [
+ "import re\n",
+ "\n",
+ "def replace(sentence, to_replace, replace_by):\n",
+ " replaced = sentence.replace(to_replace, replace_by)\n",
+ " return replaced\n",
+ "\n",
+ "def replace_num(sentence):\n",
+ " replaced = re.sub(r'^\\d{1,2}$', \"NUM\", sentence) # replace 1, 2 digits\n",
+ " replaced = re.sub(r'^\\d{4} s$', \"YEAR\", replaced) # replace year\n",
+ " replaced = re.sub(r'^\\d{4}$', \"YEAR\", replaced) # replace year\n",
+ " return replaced\n"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "vfswr-bAXwl5"
+ },
+ "source": [
+ "\n",
+ "## 2.4 Pre-processing pipeline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "uvGcbsdOXw3N"
+ },
+ "source": [
+ "def apply_preproc(data_generator):\n",
+ " data_generator = list(map(lambda x: replace(x, \"ca n t\",\"cannot\"), data_generator))\n",
+ " data_generator = list(map(lambda x: replace(x, \"ll\",\"will\"), data_generator))\n",
+ " data_generator = list(map(lambda x: replace_num(x), data_generator))\n",
+ " data_generator = list(map(lambda x: lemmatize_sentence(x), data_generator))\n",
+ " return data_generator\n",
+ "\n",
+ "processed_sentences = apply_preproc(sentences)\n",
+ "processed_test_sentences = apply_preproc(test_sentences)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "WMK0gaxoOi97"
+ },
+ "source": [
+ "\n",
+ "## 2.5 Split to train/val datasets"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "yjPJLq1I0_CX"
+ },
+ "source": [
+ "split_ratio = 0.8\n",
+ "\n",
+ "def train_val_split(data, label, ratio, shuffle=True, random_seed=33):\n",
+ " length = len(data)\n",
+ " lines_index = [*range(length)] \n",
+ " # shuffle the indexes if shuffle is set to True\n",
+ " rnd.seed(random_seed)\n",
+ " if shuffle:\n",
+ " rnd.shuffle(lines_index)\n",
+ " split_point = int(length * ratio)\n",
+ "\n",
+ " train_data = []\n",
+ " train_label = []\n",
+ " val_data = []\n",
+ " val_label = []\n",
+ " for i in range(length):\n",
+ " if i <= split_point:\n",
+ " train_data.append(data[lines_index[i]])\n",
+ " train_label.append(label[lines_index[i]])\n",
+ " else:\n",
+ " val_data.append(data[lines_index[i]])\n",
+ " val_label.append(label[lines_index[i]])\n",
+ " return train_data, train_label, val_data, val_label\n",
+ "\n",
+ "\n",
+ "train_sentences, train_tags, val_sentences, val_tags = \\\n",
+ " train_val_split(processed_sentences, tags, split_ratio)\n"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3Sot8kCJltNu"
+ },
+ "source": [
+ "\n",
+ "## 2.6 Tokenization and Padding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "GDfsVDq9KxgV"
+ },
+ "source": [
+ "oov_tok = \"\"\n",
+ "trunc_type='post'\n",
+ "pad_type='post'\n",
+ "max_length = 71\n",
+ "\n",
+ "vocab_tokenizer = Tokenizer(oov_token=oov_tok)\n",
+ "vocab_tokenizer.fit_on_texts(train_sentences)\n",
+ "\n",
+ "vocab = vocab_tokenizer.word_index\n",
+ "reverse_vocab = dict([(value, key) for (key, value) in vocab.items()])\n",
+ "vocab_size = len(vocab)\n",
+ "\n",
+ "\n",
+ "train_sequences = vocab_tokenizer.texts_to_sequences(train_sentences)\n",
+ "val_sequences = vocab_tokenizer.texts_to_sequences(val_sentences)\n",
+ "test_sequences = vocab_tokenizer.texts_to_sequences(processed_test_sentences)\n",
+ "\n",
+ "train_padded_sequences = pad_sequences(train_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "val_padded_sequences = pad_sequences(val_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "test_padded_sequences = pad_sequences(test_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "CP_DmX44K24X",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "2d2cf840-66e6-495e-f3b6-ccd62dcbd3f9"
+ },
+ "source": [
+ "tag_tokenizer = Tokenizer(filters=\".\", lower=False, oov_token=oov_tok)\n",
+ "tag_tokenizer.fit_on_texts(train_tags)\n",
+ "\n",
+ "tag_map = tag_tokenizer.word_index\n",
+ "reverse_tag_map = dict([(value, key) for (key, value) in tag_map.items()])\n",
+ "tag_size = len(tag_map)\n",
+ "\n",
+ "train_tag_sequences = tag_tokenizer.texts_to_sequences(train_tags)\n",
+ "val_tag_sequences = tag_tokenizer.texts_to_sequences(val_tags)\n",
+ "test_tag_sequences = tag_tokenizer.texts_to_sequences(test_tags)\n",
+ "\n",
+ "\n",
+ "train_padded_tags = pad_sequences(train_tag_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "val_padded_tags = pad_sequences(val_tag_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "test_padded_tags = pad_sequences(test_tag_sequences,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "print(\"\\nExample of a a sentence and its tokenized, padded version\")\n",
+ "print(train_sentences[0])\n",
+ "print(train_padded_sequences[0])\n",
+ "print(\"\\nExample of a list of tags in a sentence and its tokenized, padded version\")\n",
+ "print(train_tags[0])\n",
+ "print(train_padded_tags[0])\n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Example of a a sentence and its tokenized, padded version\n",
+ "what 2011 animated movie feature the voice of seth green joan cusack and dan fogler\n",
+ "[ 4 52 40 7 37 3 200 5 627 628 3033 1735 6 2199\n",
+ " 3760 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0]\n",
+ "\n",
+ "Example of a list of tags in a sentence and its tokenized, padded version\n",
+ "O B-Year B-Genre O O O O O B-Actor I-Actor I-Actor I-Actor O B-Actor I-Actor\n",
+ "[3 9 7 3 3 3 3 3 6 5 5 5 3 6 5 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0\n",
+ " 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "VmNBER6vh8fZ"
+ },
+ "source": [
+ "\n",
+ "## 2.7 Check the Imbalance in train/test dataset"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "1JkHts8fDElj",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "21199e96-faf4-4ff5-b376-683f6f91db13"
+ },
+ "source": [
+ "#Percentage of B, I and O Tags in train dataset\n",
+ "def get_tag_proportion(series_tags):\n",
+ " '''\n",
+ " Input:\n",
+ " series_tags - pd.Series of tags\n",
+ " Output:\n",
+ " [print] - B, I and O tags' proportion\n",
+ " '''\n",
+ " tags_list=[tag for tag in series_tags]\n",
+ " counter=dict(Counter(tags_list))\n",
+ " beg = 0\n",
+ " inter = 0\n",
+ " out = 0\n",
+ " for key, value in counter.items():\n",
+ " if key.startswith(\"B\"):\n",
+ " beg += value\n",
+ " elif key.startswith(\"I\"):\n",
+ " inter += value\n",
+ " else:\n",
+ " out += value\n",
+ " total = len(tags_list)\n",
+ " print(\"B tags proportion = {0:.2%}\".format(round(beg/total,4)))\n",
+ " print(\"I tags proportion = {0:.2%}\".format(round(inter/total,4)))\n",
+ " print(\"O tags proportion = {0:.2%}\".format(round(out/total,4)))\n",
+ "\n",
+ "get_tag_proportion(df[\"Tag\"])\n",
+ "\n",
+ "# Percentage of sentences that only contain O tags\n",
+ "# If this percentage > 50% => the dataset is imbalanced => drop empty sentences\n",
+ "def get_empty_tag_sentence_proportion(list_tag_sequence):\n",
+ " '''\n",
+ " Input:\n",
+ " list_tag_sequence - list of tag sequences in train/test set\n",
+ " Output:\n",
+ " [print] - Percentage of sentences that only contain O tags\n",
+ " '''\n",
+ " count = 0\n",
+ " for seq in list_tag_sequence:\n",
+ " if sum(seq) == 2 * len(seq): # if seq contains only 2 (token for O tag)\n",
+ " count += 1\n",
+ "\n",
+ " \n",
+ " print(\"\\nPercentage of sentences that only contain O tags -> {0:.2%}\".\\\n",
+ " format(round(count/len(list_tag_sequence),4)))\n",
+ " \n",
+ "get_empty_tag_sentence_proportion(train_tag_sequences)\n",
+ "\n",
+ "def get_OOV_density(list_token_sequence):\n",
+ " '''\n",
+ " Input:\n",
+ " list_token_sequence - list of token sequences in test set\n",
+ " Output:\n",
+ " [print] - Percentage of OOV token in the test set\n",
+ " '''\n",
+ " list_token_sequence = [token for seq in list_token_sequence for token in seq]\n",
+ " counter=dict(Counter(list_token_sequence))\n",
+ " print(\"\\nPercentage of OOV tokens in test set -> {0:.2%}\".\\\n",
+ " format(round(counter[1]/len(list_token_sequence),4)))\n",
+ "\n",
+ "get_OOV_density(test_sequences)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "B tags proportion = 14.50%\n",
+ "I tags proportion = 50.31%\n",
+ "O tags proportion = 35.19%\n",
+ "\n",
+ "Percentage of sentences that only contain O tags -> 0.02%\n",
+ "\n",
+ "Percentage of OOV tokens in test set -> 3.36%\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "0OZsbuw2iDAp"
+ },
+ "source": [
+ "\n",
+ "## 2.8 One-hot encoding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "qdsnDiIZwGaC"
+ },
+ "source": [
+ "# Ont hot encoding\n",
+ "train_padded_tags = np.array([to_categorical(tags, num_classes = tag_size+1) \\\n",
+ " for tags in train_padded_tags])\n",
+ "val_padded_tags = np.array([to_categorical(tags, num_classes = tag_size+1) \\\n",
+ " for tags in val_padded_tags])\n",
+ "test_padded_tags = np.array([to_categorical(tags, num_classes = tag_size+1) \\\n",
+ " for tags in test_padded_tags])\n",
+ "\n"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "84RwGepu37jO"
+ },
+ "source": [
+ "\n",
+ "# Part 3: Building the model\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "yX9PMBUtuJmJ"
+ },
+ "source": [
+ "\n",
+ "## 3.1 Glove Embedding"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "3QAO1_GehxQ1",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "e91e9cf9-a8b1-42d6-b836-5ec6a2a9ae74"
+ },
+ "source": [
+ "!mkdir -p /glove_embedding\n",
+ "# Download data\n",
+ "!wget --no-check-certificate \\\n",
+ "http://nlp.stanford.edu/data/glove.6B.zip -O /glove_embedding/glove.6B.zip\n",
+ "!unzip /glove_embedding/glove.6B.zip -d /glove_embedding\n",
+ "\n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "--2021-06-02 17:10:13-- http://nlp.stanford.edu/data/glove.6B.zip\n",
+ "Resolving nlp.stanford.edu (nlp.stanford.edu)... 171.64.67.140\n",
+ "Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:80... connected.\n",
+ "HTTP request sent, awaiting response... 302 Found\n",
+ "Location: https://nlp.stanford.edu/data/glove.6B.zip [following]\n",
+ "--2021-06-02 17:10:13-- https://nlp.stanford.edu/data/glove.6B.zip\n",
+ "Connecting to nlp.stanford.edu (nlp.stanford.edu)|171.64.67.140|:443... connected.\n",
+ "HTTP request sent, awaiting response... 301 Moved Permanently\n",
+ "Location: http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip [following]\n",
+ "--2021-06-02 17:10:13-- http://downloads.cs.stanford.edu/nlp/data/glove.6B.zip\n",
+ "Resolving downloads.cs.stanford.edu (downloads.cs.stanford.edu)... 171.64.64.22\n",
+ "Connecting to downloads.cs.stanford.edu (downloads.cs.stanford.edu)|171.64.64.22|:80... connected.\n",
+ "HTTP request sent, awaiting response... 200 OK\n",
+ "Length: 862182613 (822M) [application/zip]\n",
+ "Saving to: ‘/glove_embedding/glove.6B.zip’\n",
+ "\n",
+ "/glove_embedding/gl 100%[===================>] 822.24M 5.08MB/s in 2m 40s \n",
+ "\n",
+ "2021-06-02 17:12:53 (5.15 MB/s) - ‘/glove_embedding/glove.6B.zip’ saved [862182613/862182613]\n",
+ "\n",
+ "Archive: /glove_embedding/glove.6B.zip\n",
+ " inflating: /glove_embedding/glove.6B.50d.txt \n",
+ " inflating: /glove_embedding/glove.6B.100d.txt \n",
+ " inflating: /glove_embedding/glove.6B.200d.txt \n",
+ " inflating: /glove_embedding/glove.6B.300d.txt \n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "u2SIlF-NeR_o",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "2b6137ac-2988-4387-dc70-41fb7b13f552"
+ },
+ "source": [
+ "GLOVE_DIR = \"/glove_embedding\"\n",
+ "embedding_dim = 300\n",
+ "hits = 0\n",
+ "misses = 0\n",
+ "embeddings_index = {}\n",
+ "\n",
+ "with open(os.path.join(GLOVE_DIR, 'glove.6B.300d.txt')) as f:\n",
+ " for line in f:\n",
+ " values = line.split()\n",
+ " word = values[0]\n",
+ " coefs = np.asarray(values[1:], dtype='float32')\n",
+ " embeddings_index[word] = coefs\n",
+ "\n",
+ "print('Found %s word vectors.' % len(embeddings_index))\n",
+ "\n",
+ "embedding_matrix = np.zeros((len(vocab) + 1, embedding_dim))\n",
+ "for word, i in vocab.items():\n",
+ " embedding_vector = embeddings_index.get(word)\n",
+ " if embedding_vector is not None:\n",
+ " # words not found in embedding index will be all-zeros.\n",
+ " embedding_matrix[i] = embedding_vector\n",
+ " hits += 1\n",
+ " else:\n",
+ " misses += 1\n",
+ "print(\"Converted %d words (%d misses)\" % (hits, misses))\n",
+ "\n",
+ " "
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Found 400000 word vectors.\n",
+ "Converted 8448 words (664 misses)\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "3h-sT21kjV2X"
+ },
+ "source": [
+ "\n",
+ "## 3.2 Define the model "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "tshH4jK03oWM",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "f6018b1a-5900-48f7-e5c0-5d21647288a9"
+ },
+ "source": [
+ "!pip install tensorflow_addons\n",
+ "import tensorflow_addons as tfa\n",
+ "\n",
+ "\n",
+ "# Model architecture\n",
+ "batch_size = 32\n",
+ "embedding_dim = 300\n",
+ "max_length = 71\n",
+ "\n",
+ "def BiLSTM(vocab_size=vocab_size, tag_size=tag_size, hidden_size = 32, \n",
+ " embedding_dim=embedding_dim):\n",
+ " sequence_input = Input(shape = (max_length,))\n",
+ "\n",
+ " model = Embedding(input_dim = vocab_size+1, \n",
+ " output_dim = embedding_dim, \n",
+ " input_length = max_length, \n",
+ " embeddings_initializer=Constant(embedding_matrix),\n",
+ " trainable=False,\n",
+ " mask_zero = False)(sequence_input)\n",
+ " \n",
+ " model = Bidirectional(LSTM(units = hidden_size,return_sequences=True,\n",
+ " recurrent_dropout=0.1))(model)\n",
+ "\n",
+ " model = TimeDistributed(Dense(hidden_size, activation=\"relu\"))(model)\n",
+ " outputs = Dense(tag_size+1, activation='softmax')(model)\n",
+ " #crf = tfa.layers.CRF(tag_size+1) # CRF layer\n",
+ " #outputs = crf(model)\n",
+ "\n",
+ " model = Model(inputs=sequence_input, outputs=outputs)\n",
+ "\n",
+ " model.compile(optimizer=\"RMSprop\", \n",
+ " loss = tf.keras.losses.categorical_crossentropy, \n",
+ " metrics=['accuracy'])\n",
+ " #loss=crf.loss_function, metrics=[crf.accuracy])\n",
+ " return model\n",
+ "\n",
+ "\n",
+ "model = BiLSTM(vocab_size=vocab_size, tag_size=tag_size, hidden_size = 32, \\\n",
+ " embedding_dim=embedding_dim)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: tensorflow_addons in /usr/local/lib/python3.7/dist-packages (0.13.0)\n",
+ "Requirement already satisfied: typeguard>=2.7 in /usr/local/lib/python3.7/dist-packages (from tensorflow_addons) (2.7.1)\n",
+ "WARNING:tensorflow:Layer lstm_3 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.\n",
+ "WARNING:tensorflow:Layer lstm_3 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.\n",
+ "WARNING:tensorflow:Layer lstm_3 will not use cuDNN kernels since it doesn't meet the criteria. It will use a generic GPU kernel as fallback when running on GPU.\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sXWo98J3jgLQ"
+ },
+ "source": [
+ "\n",
+ "## 3.3 Callbacks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "d2hDC-w7DGvu"
+ },
+ "source": [
+ "# Callback\n",
+ "class myCallback(tf.keras.callbacks.Callback):\n",
+ " def on_epoch_end(self, epoch, logs={}):\n",
+ " if(logs.get('val_accuracy')>0.95):\n",
+ " print(\"\\nReached 95% accuracy so cancelling training!\")\n",
+ " self.model.stop_training = True\n",
+ "\n",
+ "checkpointer = ModelCheckpoint(filepath = 'NER_BiLSTM.h5',\n",
+ " verbose = 0,\n",
+ " mode = 'auto',\n",
+ " save_best_only = True,\n",
+ " monitor='val_loss')\n",
+ "\n",
+ "earlystopper = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, \n",
+ " verbose=0, mode='auto', \n",
+ " baseline=None, restore_best_weights=True)\n",
+ "\n",
+ "initial_learning_rate = 0.001\n",
+ "epochs = 15\n",
+ "decay = initial_learning_rate / epochs\n",
+ "def lr_time_based_decay(epoch, lr):\n",
+ " return lr * 1 / (1 + decay * epoch)\n",
+ "\n",
+ "lr_scheduler = tf.keras.callbacks.LearningRateScheduler(lr_time_based_decay, verbose=1)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "gKnkkiSS4EwI"
+ },
+ "source": [
+ "\n",
+ "# Part 4: Train the Model \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "vQ7uIYmgX3SC",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "faa0e4c9-2a76-4ada-eab4-8f2f85460b38"
+ },
+ "source": [
+ "num_epochs = 15\n",
+ "history = model.fit(train_padded_sequences, train_padded_tags, \n",
+ " batch_size=batch_size, epochs=num_epochs, \n",
+ " validation_data= (val_padded_sequences, val_padded_tags),\n",
+ " callbacks=[checkpointer, earlystopper, lr_scheduler])\n",
+ "\n",
+ "model.summary()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Epoch 1/15\n",
+ "\n",
+ "Epoch 00001: LearningRateScheduler reducing learning rate to 0.0010000000474974513.\n",
+ "196/196 [==============================] - 88s 428ms/step - loss: 0.4487 - accuracy: 0.9060 - val_loss: 0.2207 - val_accuracy: 0.9352\n",
+ "Epoch 2/15\n",
+ "\n",
+ "Epoch 00002: LearningRateScheduler reducing learning rate to 0.0009999333852717665.\n",
+ "196/196 [==============================] - 81s 415ms/step - loss: 0.1669 - accuracy: 0.9501 - val_loss: 0.1606 - val_accuracy: 0.9510\n",
+ "Epoch 3/15\n",
+ "\n",
+ "Epoch 00003: LearningRateScheduler reducing learning rate to 0.0009998000348467315.\n",
+ "196/196 [==============================] - 81s 414ms/step - loss: 0.1343 - accuracy: 0.9582 - val_loss: 0.1579 - val_accuracy: 0.9514\n",
+ "Epoch 4/15\n",
+ "\n",
+ "Epoch 00004: LearningRateScheduler reducing learning rate to 0.0009996001259493627.\n",
+ "196/196 [==============================] - 80s 410ms/step - loss: 0.1163 - accuracy: 0.9629 - val_loss: 0.1405 - val_accuracy: 0.9560\n",
+ "Epoch 5/15\n",
+ "\n",
+ "Epoch 00005: LearningRateScheduler reducing learning rate to 0.0009993336718878056.\n",
+ "196/196 [==============================] - 81s 413ms/step - loss: 0.1035 - accuracy: 0.9662 - val_loss: 0.1327 - val_accuracy: 0.9579\n",
+ "Epoch 6/15\n",
+ "\n",
+ "Epoch 00006: LearningRateScheduler reducing learning rate to 0.0009990006859666584.\n",
+ "196/196 [==============================] - 81s 414ms/step - loss: 0.0936 - accuracy: 0.9692 - val_loss: 0.1316 - val_accuracy: 0.9579\n",
+ "Epoch 7/15\n",
+ "\n",
+ "Epoch 00007: LearningRateScheduler reducing learning rate to 0.0009986012978557466.\n",
+ "196/196 [==============================] - 80s 410ms/step - loss: 0.0856 - accuracy: 0.9716 - val_loss: 0.1377 - val_accuracy: 0.9581\n",
+ "Epoch 8/15\n",
+ "\n",
+ "Epoch 00008: LearningRateScheduler reducing learning rate to 0.0009981355208293137.\n",
+ "196/196 [==============================] - 81s 414ms/step - loss: 0.0780 - accuracy: 0.9744 - val_loss: 0.1378 - val_accuracy: 0.9582\n",
+ "Epoch 9/15\n",
+ "\n",
+ "Epoch 00009: LearningRateScheduler reducing learning rate to 0.0009976034845113318.\n",
+ "196/196 [==============================] - 81s 415ms/step - loss: 0.0709 - accuracy: 0.9762 - val_loss: 0.1359 - val_accuracy: 0.9582\n",
+ "Model: \"model_3\"\n",
+ "_________________________________________________________________\n",
+ "Layer (type) Output Shape Param # \n",
+ "=================================================================\n",
+ "input_4 (InputLayer) [(None, 71)] 0 \n",
+ "_________________________________________________________________\n",
+ "embedding_3 (Embedding) (None, 71, 300) 2733900 \n",
+ "_________________________________________________________________\n",
+ "bidirectional_3 (Bidirection (None, 71, 64) 85248 \n",
+ "_________________________________________________________________\n",
+ "time_distributed_3 (TimeDist (None, 71, 32) 2080 \n",
+ "_________________________________________________________________\n",
+ "dense_7 (Dense) (None, 71, 27) 891 \n",
+ "=================================================================\n",
+ "Total params: 2,822,119\n",
+ "Trainable params: 2,822,119\n",
+ "Non-trainable params: 0\n",
+ "_________________________________________________________________\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "M3vHe1vaC-fG",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "3b226742-9c0d-4451-e357-e8816e877ab9"
+ },
+ "source": [
+ "history.history.keys()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ "dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy', 'lr'])"
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 59
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "K-LuVEjYZ1qg",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 517
+ },
+ "outputId": "74ec1ae3-0c31-486f-804c-4037a65ca371"
+ },
+ "source": [
+ "acc = history.history['accuracy']\n",
+ "val_acc = history.history['val_accuracy']\n",
+ "loss = history.history['loss']\n",
+ "val_loss = history.history['val_loss']\n",
+ "plt.figure(figsize = (8,8))\n",
+ "epochs = range(1, len(acc) + 1)\n",
+ "plt.plot(epochs, acc, 'wo', label='Training acc')\n",
+ "plt.plot(epochs, val_acc, 'w', label='Validation acc')\n",
+ "plt.title('Training and validation accuracy')\n",
+ "plt.legend()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "execute_result",
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ },
+ "execution_count": 60
+ },
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "_orawE5havtv",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 499
+ },
+ "outputId": "0a67400a-eaea-4a26-823d-2a3b71e80816"
+ },
+ "source": [
+ "plt.figure(figsize = (8,8))\n",
+ "plt.plot(epochs, loss, 'wo', label='Training loss')\n",
+ "plt.plot(epochs, val_loss, 'w', label='Validation loss')\n",
+ "plt.title('Training and validation loss')\n",
+ "plt.legend()\n",
+ "plt.show()"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "display_data",
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "tags": []
+ }
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "-ygvHcHU4Nkj"
+ },
+ "source": [
+ "\n",
+ "# Part 5: Test the model\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "SYIAQDjhsHNQ"
+ },
+ "source": [
+ "# Convert from one-hot encoding (3D array) to 2D array \n",
+ "test_padded_tags_pred = model.predict(test_padded_sequences)\n",
+ "test_padded_tags_pred = np.argmax(test_padded_tags_pred, axis=-1)\n",
+ "test_padded_tags_true = np.argmax(test_padded_tags, axis=-1)"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "aS4oaqkBE4XX"
+ },
+ "source": [
+ "# Convert the index to tag\n",
+ "test_tags_pred =[0]*len(test_padded_tags_pred)\n",
+ "for idx, row in enumerate(test_padded_tags_pred):\n",
+ " add = []\n",
+ " for i in row:\n",
+ " add.append(reverse_tag_map[i]) if i != 0 else add.append(\"PAD\")\n",
+ " test_tags_pred[idx] = add\n",
+ "\n",
+ "test_tags_true =[0]*len(test_padded_tags_true)\n",
+ "for idx, row in enumerate(test_padded_tags_true):\n",
+ " add = []\n",
+ " for i in row:\n",
+ " add.append(reverse_tag_map[i]) if i != 0 else add.append(\"PAD\")\n",
+ " test_tags_true[idx] = add"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "OO5Pg9r2MmV8",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "96156078-c726-4d4d-8f4e-43886bdc7f1c"
+ },
+ "source": [
+ "print(\"Micro F1-score is : {:.1%}\".format(f1_score(test_tags_true, test_tags_pred)))\n",
+ "print(\"Micro Precision-score is : {:.1%}\".format(precision_score(test_tags_true, test_tags_pred)))\n",
+ "print(\"Micro Recall-score is : {:.1%}\".format(recall_score(test_tags_true, test_tags_pred)))\n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.7/dist-packages/seqeval/metrics/sequence_labeling.py:171: UserWarning: PAD seems not to be NE tag.\n",
+ " warnings.warn('{} seems not to be NE tag.'.format(chunk))\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ "Micro F1-score is : 69.6%\n",
+ "Micro Precision-score is : 66.4%\n",
+ "Micro Recall-score is : 73.0%\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "znKBJcadMr9l",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "ef4323f4-fa78-45b7-c69f-bb68f54bdfb0"
+ },
+ "source": [
+ "report = flat_classification_report(y_pred=test_tags_pred, y_true=test_tags_true)\n",
+ "print(report)"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "/usr/local/lib/python3.7/dist-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+ " _warn_prf(average, modifier, msg_start, len(result))\n"
+ ],
+ "name": "stderr"
+ },
+ {
+ "output_type": "stream",
+ "text": [
+ " precision recall f1-score support\n",
+ "\n",
+ " B-Actor 0.91 0.95 0.93 1274\n",
+ " B-Award 0.61 0.52 0.56 66\n",
+ "B-Character_Name 0.65 0.48 0.55 283\n",
+ " B-Director 0.87 0.87 0.87 425\n",
+ " B-Genre 0.78 0.83 0.81 789\n",
+ " B-Opinion 0.47 0.53 0.50 195\n",
+ " B-Origin 0.54 0.27 0.36 190\n",
+ " B-Plot 0.55 0.43 0.48 1577\n",
+ " B-Quote 0.00 0.00 0.00 47\n",
+ " B-Relationship 0.80 0.62 0.70 171\n",
+ " B-Soundtrack 0.00 0.00 0.00 8\n",
+ " B-Year 0.94 0.97 0.96 661\n",
+ " I-Actor 0.91 0.95 0.93 1553\n",
+ " I-Award 0.64 0.74 0.69 147\n",
+ "I-Character_Name 0.64 0.41 0.50 227\n",
+ " I-Director 0.91 0.90 0.91 411\n",
+ " I-Genre 0.84 0.62 0.71 544\n",
+ " I-Opinion 0.50 0.02 0.04 143\n",
+ " I-Origin 0.68 0.69 0.69 808\n",
+ " I-Plot 0.90 0.92 0.91 14661\n",
+ " I-Quote 0.60 0.62 0.61 344\n",
+ " I-Relationship 0.59 0.36 0.44 289\n",
+ " I-Soundtrack 0.00 0.00 0.00 30\n",
+ " I-Year 0.71 0.57 0.63 44\n",
+ " O 0.85 0.89 0.86 14143\n",
+ " PAD 1.00 1.00 1.00 99704\n",
+ "\n",
+ " accuracy 0.96 138734\n",
+ " macro avg 0.65 0.58 0.60 138734\n",
+ " weighted avg 0.96 0.96 0.96 138734\n",
+ "\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "HE7Q2oD2M9rL",
+ "colab": {
+ "base_uri": "https://localhost:8080/"
+ },
+ "outputId": "df0d97c9-fa16-4333-876c-915231440dc6"
+ },
+ "source": [
+ "# At every execution model picks some random test sample from test set.\n",
+ "i = np.random.randint(0,test_padded_sequences.shape[0]) # choose a random number between 0 and len(X_te)b\n",
+ "p = model.predict(np.array([test_padded_sequences[i]]))\n",
+ "p = np.argmax(p, axis=-1)\n",
+ "true = np.argmax(test_padded_tags[i], -1)\n",
+ "\n",
+ "print(\"Sample number {} of {} (Test Set)\".format(i, test_padded_sequences.shape[0]))\n",
+ "# Visualization\n",
+ "print(\"{:20}||{:20}||{}\".format(\"Word\", \"True\", \"Pred\"))\n",
+ "print(60 * \"=\")\n",
+ "for word, tag, pred in zip(test_padded_sequences[i], true, p[0]):\n",
+ " if word != 0:\n",
+ " print(\"{:20}: {:20} {}\".format(reverse_vocab[word], reverse_tag_map[tag], reverse_tag_map[pred]))\n",
+ "\n"
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "stream",
+ "text": [
+ "Sample number 1382 of 1954 (Test Set)\n",
+ "Word ||True ||Pred\n",
+ "============================================================\n",
+ "a : O O\n",
+ "animated : B-Genre B-Genre\n",
+ "movie : O O\n",
+ "about : O O\n",
+ "a : B-Plot B-Plot\n",
+ "father : I-Plot I-Plot\n",
+ "clown : I-Plot I-Plot\n",
+ "fish : I-Plot I-Plot\n",
+ "that : I-Plot I-Plot\n",
+ "ha : I-Plot I-Plot\n",
+ "lost : I-Plot I-Plot\n",
+ "his : I-Plot I-Plot\n",
+ "son : I-Plot I-Plot\n",
+ "in : I-Plot I-Plot\n",
+ "the : I-Plot I-Plot\n",
+ "deep : I-Plot I-Plot\n",
+ "blue : I-Plot I-Plot\n",
+ "ocean : I-Plot I-Plot\n",
+ "and : I-Plot I-Plot\n",
+ "wiwill : I-Plot I-Plot\n",
+ "stop : I-Plot I-Plot\n",
+ "at : I-Plot I-Plot\n",
+ "nothing : I-Plot I-Plot\n",
+ "to : I-Plot I-Plot\n",
+ "find : I-Plot I-Plot\n",
+ "him : I-Plot I-Plot\n"
+ ],
+ "name": "stdout"
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "sNtJLetx4Sb_"
+ },
+ "source": [
+ "\n",
+ "# Part 6: Test with your own sentence"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "V0niUrOnVskX",
+ "colab": {
+ "base_uri": "https://localhost:8080/",
+ "height": 758
+ },
+ "outputId": "77c0aa7b-7836-4909-edda-72903c0e893f"
+ },
+ "source": [
+ "# if this cell fails => run the 2nd time, it will work\n",
+ "\n",
+ "original_to_test = [\"is michael scofield the protagonist in prison break\", \n",
+ " \"what is the highest rated romantic movie in all time\"]\n",
+ "\n",
+ "to_test = apply_preproc(original_to_test)\n",
+ "\n",
+ "vocab_tokenizer.fit_on_texts(to_test)\n",
+ "to_test = vocab_tokenizer.texts_to_sequences(to_test)\n",
+ "\n",
+ "to_test = pad_sequences(to_test,\n",
+ " maxlen=max_length, \n",
+ " truncating=trunc_type, \n",
+ " padding=pad_type)\n",
+ "\n",
+ "to_test_tag_pred = model.predict(to_test)\n",
+ "to_test_tag_pred = np.argmax(to_test_tag_pred, axis=-1)\n",
+ "\n",
+ "for i, row in enumerate(to_test_tag_pred):\n",
+ " print(\"\\n{:20}||{}\".format(\"Word\", \"Pred\"))\n",
+ " print(40 * \"=\")\n",
+ " for j, pred in enumerate(row):\n",
+ " words = original_to_test[i].split(' ')\n",
+ " length = len(words)\n",
+ " if pred != 0 and j < length:\n",
+ " print(\"{:20}: {}\".format(words[j], reverse_tag_map[pred]))\n",
+ " "
+ ],
+ "execution_count": null,
+ "outputs": [
+ {
+ "output_type": "error",
+ "ename": "InvalidArgumentError",
+ "evalue": "ignored",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m padding=pad_type)\n\u001b[1;32m 15\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0mto_test_tag_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mto_test\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0mto_test_tag_pred\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0margmax\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mto_test_tag_pred\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0maxis\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose, steps, callbacks, max_queue_size, workers, use_multiprocessing)\u001b[0m\n\u001b[1;32m 1725\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mstep\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mdata_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msteps\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1726\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_predict_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstep\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1727\u001b[0;31m \u001b[0mtmp_batch_outputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_function\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1728\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mdata_handler\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mshould_sync\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1729\u001b[0m \u001b[0mcontext\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0masync_wait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 887\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 888\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mOptionalXlaContext\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_jit_compile\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 889\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 890\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 891\u001b[0m \u001b[0mnew_tracing_count\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mexperimental_get_tracing_count\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/def_function.py\u001b[0m in \u001b[0;36m_call\u001b[0;34m(self, *args, **kwds)\u001b[0m\n\u001b[1;32m 922\u001b[0m \u001b[0;31m# In this case we have not created variables on the first call. So we can\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 923\u001b[0m \u001b[0;31m# run the first trace but we should fail if variables are created.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 924\u001b[0;31m \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_stateful_fn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 925\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_created_variables\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 926\u001b[0m raise ValueError(\"Creating variables on a non-first call to a function\"\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 3022\u001b[0m filtered_flat_args) = self._maybe_define_function(args, kwargs)\n\u001b[1;32m 3023\u001b[0m return graph_function._call_flat(\n\u001b[0;32m-> 3024\u001b[0;31m filtered_flat_args, captured_inputs=graph_function.captured_inputs) # pylint: disable=protected-access\n\u001b[0m\u001b[1;32m 3025\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3026\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[0;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[1;32m 1959\u001b[0m \u001b[0;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1960\u001b[0m return self._build_call_outputs(self._inference_function.call(\n\u001b[0;32m-> 1961\u001b[0;31m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[1;32m 1962\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n\u001b[1;32m 1963\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/function.py\u001b[0m in \u001b[0;36mcall\u001b[0;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[1;32m 594\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 595\u001b[0m \u001b[0mattrs\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mattrs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 596\u001b[0;31m ctx=ctx)\n\u001b[0m\u001b[1;32m 597\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 598\u001b[0m outputs = execute.execute_with_cancellation(\n",
+ "\u001b[0;32m/usr/local/lib/python3.7/dist-packages/tensorflow/python/eager/execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[0;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mensure_initialized\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m tensors = pywrap_tfe.TFE_Py_Execute(ctx._handle, device_name, op_name,\n\u001b[0;32m---> 60\u001b[0;31m inputs, attrs, num_outputs)\n\u001b[0m\u001b[1;32m 61\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mcore\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_NotOkStatusException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 62\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mname\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+ "\u001b[0;31mInvalidArgumentError\u001b[0m: 2 root error(s) found.\n (0) Invalid argument: indices[0,2] = 9113 is not in [0, 9113)\n\t [[node model/embedding/embedding_lookup (defined at :16) ]]\n (1) Invalid argument: indices[0,2] = 9113 is not in [0, 9113)\n\t [[node model/embedding/embedding_lookup (defined at :16) ]]\n\t [[model/embedding/embedding_lookup/_21]]\n0 successful operations.\n0 derived errors ignored. [Op:__inference_predict_function_14054]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node model/embedding/embedding_lookup:\n model/embedding/embedding_lookup/13534 (defined at /usr/lib/python3.7/contextlib.py:112)\n\nInput Source operations connected to node model/embedding/embedding_lookup:\n model/embedding/embedding_lookup/13534 (defined at /usr/lib/python3.7/contextlib.py:112)\n\nFunction call stack:\npredict_function -> predict_function\n"
+ ]
+ }
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "WZJApvzvaJcy"
+ },
+ "source": [
+ "\n",
+ "# Part 7: Analyse the incorrect predictions"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "fb8q_rD4aNRH"
+ },
+ "source": [
+ "def get_incorrect(y_pred, y_true, X_test):\n",
+ " y_pred.flatten()\n",
+ " y_true.flatten()\n",
+ " X_test.flatten()\n",
+ " where_incorrect = y_true != y_pred\n",
+ " incorrect_idxes = np.where(where_incorrect==1)[0]\n",
+ " incorrect_tokens = X_test[incorrect_idxes]\n",
+ " incorrect_tokens = dict(Counter(incorrect_tokens.flatten()))\n",
+ " incorrect_tags = y_true[incorrect_idxes]\n",
+ " incorrect_tags = dict(Counter(incorrect_tags.flatten()))\n",
+ " return incorrect_tokens, incorrect_tags\n",
+ "\n",
+ "\n",
+ "incorrect_tokens, incorrect_tags = get_incorrect(test_padded_tags_pred, \n",
+ " test_padded_tags_true, \n",
+ " test_padded_sequences) \n",
+ "\n",
+ "incorrect_tokens = sorted(incorrect_tokens.items(), key=lambda x:x[1], reverse=True)\n",
+ "incorrect_tags = sorted(incorrect_tags.items(), key=lambda x:x[1], reverse=True)\n",
+ "\n",
+ "print(\"{:^20}||{:^15}\".format(\"Incorrect word\", \"Frequency\"))\n",
+ "print(37 * \"=\")\n",
+ "for idx, count in incorrect_tokens[:20]:\n",
+ " if idx != 0:\n",
+ " print(\"{:20}: {:15}\".format(reverse_vocab[idx], count))\n",
+ "\n",
+ "print(\"\\n{:^20}||{:^15}\".format(\"Incorrect tag\", \"Frequency\"))\n",
+ "print(37 * \"=\")\n",
+ "for idx, count in incorrect_tags[:20]:\n",
+ " if idx != 0:\n",
+ " print(\"{:20}: {:15}\".format(reverse_tag_map[idx], count))\n"
+ ],
+ "execution_count": null,
+ "outputs": []
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "5rOw_59Ztzy7"
+ },
+ "source": [
+ "## Conclusion after analysis "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "NiAN2Zy--j-_"
+ },
+ "source": [
+ "1. A lot of incorrect words are stopwords, due to their occurence in the plot. This is the problem of token label consistency.\n",
+ "2. The values of macro-average are much lower than micro-average of Precision, Recall and F1-score, as a result of imbalanced classes.\n",
+ "3. The number of incorrect Plot entities are so high, as Plot entities are the longest ones.\n",
+ "4. Lots of abbreviations that need to be cleaned. For example: country names (US -> u, s) or person name (J.K.Rowling => j, k, rowling)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "N2J2LDiBRkyq"
+ },
+ "source": [
+ "## Potential improvements"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "9aBUF7SIHVhd"
+ },
+ "source": [
+ "1. To tackle imbalanced classes problem, use over-sample to gain more examples of tags from the minority groups.\n",
+ "2. To tackle label inconsistency, there're 3 solutions:\n",
+ "\n",
+ " * Use larger context. For example, use longer sentences, or combine 2 or more sentences that have similar/corelated meaning.\n",
+ " * Use CRF decoder layer.\n",
+ " * Use Character/Subword-level encoders like ELMO, Flair, CNN and BERT. \n",
+ "\n",
+ "3. To regconize long entity better:\n",
+ " * Incorporate POS tagging beside BIO tagging. \n",
+ " * Use ensemble to combine multiple models.\n",
+ "\n",
+ "\n",
+ "\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {
+ "id": "NkJ8Go7BVVgz"
+ },
+ "source": [
+ "\n",
+ "# Export result to .tsv file"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "metadata": {
+ "id": "3EQxLZgzRdL9"
+ },
+ "source": [
+ "# write result to a new txt file\n",
+ "with open('/tmp/pred.tsv', 'wt') as out_file:\n",
+ " tsv_writer = csv.writer(out_file, delimiter='\\t')\n",
+ " test_size = len(test_padded_sequences)\n",
+ " for i in range(test_size):\n",
+ " for pred, word in zip(test_padded_tags_pred[i], test_padded_sequences[i]):\n",
+ " if pred != 0 and word != 0:\n",
+ " tsv_writer.writerow([reverse_tag_map[pred], reverse_vocab[word]])\n",
+ " tsv_writer.writerow([])"
+ ],
+ "execution_count": null,
+ "outputs": []
+ }
+ ]
+}
\ No newline at end of file