diff --git a/hw0/boriskin/hw0.ipynb b/hw0/boriskin/hw0.ipynb
new file mode 100644
index 0000000..e42cba7
--- /dev/null
+++ b/hw0/boriskin/hw0.ipynb
@@ -0,0 +1,1760 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \u001b[39m"
+ ]
+ },
+ "execution_count": 1,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import $ivy.`org.apache.spark::spark-sql:2.4.3`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\u001b[39m"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import org.apache.spark.sql._"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n",
+ "\u001b[39m"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import org.apache.log4j.{Level, Logger}\n",
+ "Logger.getLogger(\"org\").setLevel(Level.OFF)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loading spark-stubs\n",
+ "Creating SparkSession\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "Spark UI"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@6731a7cf"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val spark = {\n",
+ " NotebookSparkSession.builder()\n",
+ " .master(\"local[*]\")\n",
+ " .getOrCreate()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def sc = spark.sparkContext"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mrdd\u001b[39m: \u001b[32morg\u001b[39m.\u001b[32mapache\u001b[39m.\u001b[32mspark\u001b[39m.\u001b[32mrdd\u001b[39m.\u001b[32mRDD\u001b[39m[\u001b[32mInt\u001b[39m] = ParallelCollectionRDD[0] at parallelize at cmd5.sc:1"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val rdd = sc.parallelize(1 to 100000000, 100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n",
+ "\n",
+ "\u001b[39m"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import spark.implicits._\n",
+ "\n",
+ "org.apache.spark.sql.catalyst.encoders.OuterScopes.addOuterScope(this);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ " csv at cmd7.sc:1\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
data: DataFrame = [id: string, name: string ... 14 more fields]
\n",
+ "
"
+ ],
+ "text/plain": [
+ "\u001b[36mdata\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, name: string ... 14 more fields]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "var data = spark.read.option(\"header\", \"true\").option(\"mode\", \"DROPMALFORMED\").option(\"escape\", \"\\\"\").csv(\"AB_NYC_2019.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "defined \u001b[32mclass\u001b[39m \u001b[36mRichDF\u001b[39m"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "implicit class RichDF(val df: DataFrame) {\n",
+ " def showHTML(limit:Int = 20, truncate: Int = 20) = {\n",
+ " import xml.Utility.escape\n",
+ " val data = df.take(limit)\n",
+ " val header = df.schema.fieldNames.toSeq\n",
+ " val rows: Seq[Seq[String]] = data.map { row =>\n",
+ " row.toSeq.map { cell =>\n",
+ " val str = cell match {\n",
+ " case null => \"null\"\n",
+ " case binary: Array[Byte] => binary.map(\"%02X\".format(_)).mkString(\"[\", \" \", \"]\")\n",
+ " case array: Array[_] => array.mkString(\"[\", \", \", \"]\")\n",
+ " case seq: Seq[_] => seq.mkString(\"[\", \", \", \"]\")\n",
+ " case _ => cell.toString\n",
+ " }\n",
+ " if (truncate > 0 && str.length > truncate) {\n",
+ " // do not show ellipses for strings shorter than 4 characters.\n",
+ " if (truncate < 4) str.substring(0, truncate)\n",
+ " else str.substring(0, truncate - 3) + \"...\"\n",
+ " } else {\n",
+ " str\n",
+ " }\n",
+ " }: Seq[String]\n",
+ " }\n",
+ "\n",
+ " publish.html(s\"\"\"\n",
+ " \n",
+ " \n",
+ " ${header.map(h => s\"| ${escape(h)} | \").mkString}\n",
+ "
\n",
+ " ${rows.map { row =>\n",
+ " s\"${row.map { c => s\"| ${escape(c)} | \" }.mkString}
\"\n",
+ " }.mkString\n",
+ " }\n",
+ "
\"\"\")\n",
+ " }\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {
+ "scrolled": false
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | \n",
+ "
\n",
+ " | 2539 | Clean & quiet apt... | 2787 | John | Brooklyn | Kensington | 40.64749 | -73.97237 | Private room | 149 | 1 | 9 | 2018-10-19 | 0.21 | 6 | 365 |
| 2595 | Skylit Midtown Ca... | 2845 | Jennifer | Manhattan | Midtown | 40.75362 | -73.98377 | Entire home/apt | 225 | 1 | 45 | 2019-05-21 | 0.38 | 2 | 355 |
| 3647 | THE VILLAGE OF HA... | 4632 | Elisabeth | Manhattan | Harlem | 40.80902 | -73.9419 | Private room | 150 | 3 | 0 | null | null | 1 | 365 |
| 3831 | Cozy Entire Floor... | 4869 | LisaRoxanne | Brooklyn | Clinton Hill | 40.68514 | -73.95976 | Entire home/apt | 89 | 1 | 270 | 2019-07-05 | 4.64 | 1 | 194 |
| 5022 | Entire Apt: Spaci... | 7192 | Laura | Manhattan | East Harlem | 40.79851 | -73.94399 | Entire home/apt | 80 | 10 | 9 | 2018-11-19 | 0.10 | 1 | 0 |
| 5099 | Large Cozy 1 BR A... | 7322 | Chris | Manhattan | Murray Hill | 40.74767 | -73.975 | Entire home/apt | 200 | 3 | 74 | 2019-06-22 | 0.59 | 1 | 129 |
| 5121 | BlissArtsSpace! | 7356 | Garon | Brooklyn | Bedford-Stuyvesant | 40.68688 | -73.95596 | Private room | 60 | 45 | 49 | 2017-10-05 | 0.40 | 1 | 0 |
| 5178 | Large Furnished R... | 8967 | Shunichi | Manhattan | Hell's Kitchen | 40.76489 | -73.98493 | Private room | 79 | 2 | 430 | 2019-06-24 | 3.47 | 1 | 220 |
| 5203 | Cozy Clean Guest ... | 7490 | MaryEllen | Manhattan | Upper West Side | 40.80178 | -73.96723 | Private room | 79 | 2 | 118 | 2017-07-21 | 0.99 | 1 | 0 |
| 5238 | Cute & Cozy Lower... | 7549 | Ben | Manhattan | Chinatown | 40.71344 | -73.99037 | Entire home/apt | 150 | 1 | 160 | 2019-06-09 | 1.33 | 4 | 188 |
| 5295 | Beautiful 1br on ... | 7702 | Lena | Manhattan | Upper West Side | 40.80316 | -73.96545 | Entire home/apt | 135 | 5 | 53 | 2019-06-22 | 0.43 | 1 | 6 |
| 5441 | Central Manhattan... | 7989 | Kate | Manhattan | Hell's Kitchen | 40.76076 | -73.98867 | Private room | 85 | 2 | 188 | 2019-06-23 | 1.50 | 1 | 39 |
| 5803 | Lovely Room 1, Ga... | 9744 | Laurie | Brooklyn | South Slope | 40.66829 | -73.98779 | Private room | 89 | 4 | 167 | 2019-06-24 | 1.34 | 3 | 314 |
| 6021 | Wonderful Guest B... | 11528 | Claudio | Manhattan | Upper West Side | 40.79826 | -73.96113 | Private room | 85 | 2 | 113 | 2019-07-05 | 0.91 | 1 | 333 |
| 6090 | West Village Nest... | 11975 | Alina | Manhattan | West Village | 40.7353 | -74.00525 | Entire home/apt | 120 | 90 | 27 | 2018-10-31 | 0.22 | 1 | 0 |
| 6848 | Only 2 stops to M... | 15991 | Allen & Irina | Brooklyn | Williamsburg | 40.70837 | -73.95352 | Entire home/apt | 140 | 2 | 148 | 2019-06-29 | 1.20 | 1 | 46 |
| 7097 | Perfect for Your ... | 17571 | Jane | Brooklyn | Fort Greene | 40.69169 | -73.97185 | Entire home/apt | 215 | 2 | 198 | 2019-06-28 | 1.72 | 1 | 321 |
| 7322 | Chelsea Perfect | 18946 | Doti | Manhattan | Chelsea | 40.74192 | -73.99501 | Private room | 140 | 1 | 260 | 2019-07-01 | 2.12 | 1 | 12 |
| 7726 | Hip Historic Brow... | 20950 | Adam And Charity | Brooklyn | Crown Heights | 40.67592 | -73.94694 | Entire home/apt | 99 | 3 | 53 | 2019-06-22 | 4.44 | 1 | 21 |
| 7750 | Huge 2 BR Upper E... | 17985 | Sing | Manhattan | East Harlem | 40.79685 | -73.94872 | Entire home/apt | 190 | 7 | 0 | null | null | 2 | 249 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "data.toDF.showHTML()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.expressions.Window\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions.{avg, callUDF, col, corr, desc, lit, row_number, udf, variance}\u001b[39m"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import org.apache.spark.sql.expressions.Window\n",
+ "import org.apache.spark.sql.functions.{avg, callUDF, col, corr, desc, lit, row_number, udf, variance}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | \n",
+ "
\n",
+ " | 2539 | Clean & quiet apt... | 2787 | John | Brooklyn | Kensington | 40.64749 | -73.97237 | Private room | 149 | 1 | 9 | 2018-10-19 | 0.21 | 6 | 365 |
| 2595 | Skylit Midtown Ca... | 2845 | Jennifer | Manhattan | Midtown | 40.75362 | -73.98377 | Entire home/apt | 225 | 1 | 45 | 2019-05-21 | 0.38 | 2 | 355 |
| 3831 | Cozy Entire Floor... | 4869 | LisaRoxanne | Brooklyn | Clinton Hill | 40.68514 | -73.95976 | Entire home/apt | 89 | 1 | 270 | 2019-07-05 | 4.64 | 1 | 194 |
| 5022 | Entire Apt: Spaci... | 7192 | Laura | Manhattan | East Harlem | 40.79851 | -73.94399 | Entire home/apt | 80 | 10 | 9 | 2018-11-19 | 0.10 | 1 | 0 |
| 5099 | Large Cozy 1 BR A... | 7322 | Chris | Manhattan | Murray Hill | 40.74767 | -73.975 | Entire home/apt | 200 | 3 | 74 | 2019-06-22 | 0.59 | 1 | 129 |
| 5121 | BlissArtsSpace! | 7356 | Garon | Brooklyn | Bedford-Stuyvesant | 40.68688 | -73.95596 | Private room | 60 | 45 | 49 | 2017-10-05 | 0.40 | 1 | 0 |
| 5178 | Large Furnished R... | 8967 | Shunichi | Manhattan | Hell's Kitchen | 40.76489 | -73.98493 | Private room | 79 | 2 | 430 | 2019-06-24 | 3.47 | 1 | 220 |
| 5203 | Cozy Clean Guest ... | 7490 | MaryEllen | Manhattan | Upper West Side | 40.80178 | -73.96723 | Private room | 79 | 2 | 118 | 2017-07-21 | 0.99 | 1 | 0 |
| 5238 | Cute & Cozy Lower... | 7549 | Ben | Manhattan | Chinatown | 40.71344 | -73.99037 | Entire home/apt | 150 | 1 | 160 | 2019-06-09 | 1.33 | 4 | 188 |
| 5295 | Beautiful 1br on ... | 7702 | Lena | Manhattan | Upper West Side | 40.80316 | -73.96545 | Entire home/apt | 135 | 5 | 53 | 2019-06-22 | 0.43 | 1 | 6 |
| 5441 | Central Manhattan... | 7989 | Kate | Manhattan | Hell's Kitchen | 40.76076 | -73.98867 | Private room | 85 | 2 | 188 | 2019-06-23 | 1.50 | 1 | 39 |
| 5803 | Lovely Room 1, Ga... | 9744 | Laurie | Brooklyn | South Slope | 40.66829 | -73.98779 | Private room | 89 | 4 | 167 | 2019-06-24 | 1.34 | 3 | 314 |
| 6021 | Wonderful Guest B... | 11528 | Claudio | Manhattan | Upper West Side | 40.79826 | -73.96113 | Private room | 85 | 2 | 113 | 2019-07-05 | 0.91 | 1 | 333 |
| 6090 | West Village Nest... | 11975 | Alina | Manhattan | West Village | 40.7353 | -74.00525 | Entire home/apt | 120 | 90 | 27 | 2018-10-31 | 0.22 | 1 | 0 |
| 6848 | Only 2 stops to M... | 15991 | Allen & Irina | Brooklyn | Williamsburg | 40.70837 | -73.95352 | Entire home/apt | 140 | 2 | 148 | 2019-06-29 | 1.20 | 1 | 46 |
| 7097 | Perfect for Your ... | 17571 | Jane | Brooklyn | Fort Greene | 40.69169 | -73.97185 | Entire home/apt | 215 | 2 | 198 | 2019-06-28 | 1.72 | 1 | 321 |
| 7322 | Chelsea Perfect | 18946 | Doti | Manhattan | Chelsea | 40.74192 | -73.99501 | Private room | 140 | 1 | 260 | 2019-07-01 | 2.12 | 1 | 12 |
| 7726 | Hip Historic Brow... | 20950 | Adam And Charity | Brooklyn | Crown Heights | 40.67592 | -73.94694 | Entire home/apt | 99 | 3 | 53 | 2019-06-22 | 4.44 | 1 | 21 |
| 7801 | Sweet and Spaciou... | 21207 | Chaya | Brooklyn | Williamsburg | 40.71842 | -73.95718 | Entire home/apt | 299 | 3 | 9 | 2011-12-28 | 0.07 | 1 | 0 |
| 8024 | CBG CtyBGd HelpsH... | 22486 | Lisel | Brooklyn | Park Slope | 40.68069 | -73.97706 | Private room | 130 | 2 | 130 | 2019-07-01 | 1.09 | 6 | 347 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36msqlContext\u001b[39m: \u001b[32mSQLContext\u001b[39m = org.apache.spark.sql.SQLContext@7d933b13"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "// Фильтр по ячейкам, где есть Null-значения\n",
+ "\n",
+ "val sqlContext = spark.sqlContext\n",
+ "\n",
+ "sqlContext.createDataFrame(data.rdd.filter(x=> Range(0, x.length).count(x.isNullAt(_)) < 1 ), data.schema).showHTML()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "// Фильтр на предложения, где цена нулевая и где минимальное количество ночей больше 365:\n",
+ "\n",
+ "data = data.where(col(\"price\") > 0)\n",
+ "\n",
+ "data = data.where(col(\"minimum_nights\") < 366)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Медиана:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 2 / 2\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 4 / 4\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 20 / 20\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 25 / 25\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | room_type | Медиана | \n",
+ "
\n",
+ " | 51 | 1.0 |
| 205 | 4.0 |
| 54 | 7.0 |
| 200 | 1.0 |
| 279 | 2.0 |
| 138 | 2.0 |
| 69 | 2.0 |
| 42 | 5.0 |
| Shared room | 45.0 |
| 425 | 1.0 |
| 59 | 2.0 |
| 250 | 2.0 |
| 160 | 1.0 |
| 85 | 2.0 |
| 35 | 1.0 |
| 410 | 31.0 |
| 298 | 3.0 |
| 188 | 1.0 |
| 99 | 1.0 |
| 110 | 3.0 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "Мода:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 2 / 2\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 200 / 200\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 4 / 4\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 20 / 20\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 25 / 25\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | room_type | price | \n",
+ "
\n",
+ " | 51 | 1 |
| 205 | 4 |
| 54 | 7 |
| 200 | 1 |
| 279 | 2 |
| 138 | 2 |
| 69 | 2 |
| 42 | 5 |
| Shared room | 35 |
| 425 | 1 |
| 59 | 2 |
| 250 | 2 |
| 160 | 1 |
| 85 | 2 |
| 35 | 5 |
| 410 | 31 |
| 188 | 1 |
| 298 | 3 |
| 99 | 1 |
| 110 | 3 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "Среднее:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 2 / 2\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 4 / 4\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 20 / 20\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 25 / 25\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | room_type | Среднее | \n",
+ "
\n",
+ " | 51 | 1.0 |
| 205 | 4.0 |
| 54 | 7.0 |
| 200 | 1.0 |
| 279 | 2.0 |
| 138 | 2.0 |
| 69 | 1.7142857142857142 |
| 42 | 5.0 |
| Shared room | 70.21991341991342 |
| 425 | 1.0 |
| 59 | 7.8 |
| 250 | 2.0 |
| 160 | 2.0 |
| 85 | 2.0 |
| 35 | 3.0 |
| 410 | 31.0 |
| 298 | 3.0 |
| 188 | 1.0 |
| 99 | 2.0 |
| 110 | 3.0 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "Дисперсия:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 2 / 2\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 4 / 4\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 20 / 20\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 25 / 25\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | room_type | var_samp(price) | \n",
+ "
\n",
+ " | 51 | 0.0 |
| 205 | NaN |
| 54 | NaN |
| 200 | NaN |
| 279 | NaN |
| 138 | NaN |
| 69 | 0.23809523809523805 |
| 42 | NaN |
| Shared room | 10382.929065850383 |
| 425 | NaN |
| 59 | 154.2 |
| 250 | 0.0 |
| 160 | 2.0 |
| 85 | NaN |
| 35 | 8.0 |
| 410 | NaN |
| 298 | NaN |
| 188 | NaN |
| 99 | 1.3333333333333333 |
| 110 | NaN |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "// Посчитать медиану, моду и среднее, и дисперсию для каждого room_type\n",
+ "\n",
+ "println(\"Медиана:\")\n",
+ "\n",
+ "data.groupBy(\"room_type\").agg(callUDF(\"percentile_approx\", col(\"price\"), lit(0.5)).as(\"Медиана\")).toDF.showHTML()\n",
+ "\n",
+ "println(\"\")\n",
+ "println(\"\")\n",
+ "println(\"Мода:\")\n",
+ "\n",
+ "data.groupBy(\"room_type\", \"price\").count().withColumn(\"row_number\", row_number().over(Window.partitionBy(\"room_type\").orderBy(desc(\"count\")))).select(\"room_type\", \"price\").where(col(\"row_number\") === 1).toDF.showHTML()\n",
+ "\n",
+ "println(\"\")\n",
+ "println(\"\")\n",
+ "println(\"Среднее:\")\n",
+ "\n",
+ "data.groupBy(\"room_type\").agg(avg(\"price\").as(\"Среднее\")).toDF.showHTML()\n",
+ "\n",
+ "println(\"\")\n",
+ "println(\"\")\n",
+ "println(\"Дисперсия:\")\n",
+ "\n",
+ "data.select(\"room_type\", \"price\").groupBy(\"room_type\").agg(variance(\"price\")).as(\"Дисперсия\").toDF.showHTML()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Самое дешевое:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 2 / 2\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | \n",
+ "
\n",
+ " | 24114389 | Very Spacious bed... | 180661875 | Salim | Manhattan | Upper West Side | 40.76844 | -73.98333 | Private room | 10 | 1 | 2 | 2018-04-23 | 0.13 | 1 | 0 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "Самое дорогое:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 2 / 2\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | id | name | host_id | host_name | neighbourhood_group | neighbourhood | latitude | longitude | room_type | price | minimum_nights | number_of_reviews | last_review | reviews_per_month | calculated_host_listings_count | availability_365 | \n",
+ "
\n",
+ " | 31340283 | 2br - The Heart o... | 4382127 | Matt | Manhattan | Lower East Side | 40.7198 | -73.98566 | Entire home/apt | 9999 | 30 | 0 | null | null | 1 | 365 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "// Найти самое дорогое и самое дешевое предложение\n",
+ "\n",
+ "println(\"Самое дешевое:\")\n",
+ "\n",
+ "data.orderBy(\"price\").showHTML(1)\n",
+ "\n",
+ "println(\"\")\n",
+ "println(\"\")\n",
+ "println(\"Самое дорогое:\")\n",
+ "\n",
+ "data.orderBy(desc(\"price\")).showHTML(1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Корреляцию между ценой и минимальным количеством ночей:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 2 / 2\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | corr(price, minimum_nights) | \n",
+ "
\n",
+ " | 0.049635888865656445 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "\n",
+ "Корреляцию между ценой и количеством отзывов:\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 2 / 2\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | corr(price, number_of_reviews) | \n",
+ "
\n",
+ " | -0.04804569425619054 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "// Посчитать корреляцию между ценой и минимальным количеством ночей, кол-вом отзывов\n",
+ "\n",
+ "println(\"Корреляцию между ценой и минимальным количеством ночей:\")\n",
+ "\n",
+ "data.agg(corr(\"price\", \"minimum_nights\")).toDF.showHTML()\n",
+ "\n",
+ "println(\"\")\n",
+ "println(\"\")\n",
+ "println(\"Корреляцию между ценой и количеством отзывов:\")\n",
+ "\n",
+ "data.agg(corr(\"price\", \"number_of_reviews\")).toDF.showHTML()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mencodeGeoHash\u001b[39m: (\u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m, \u001b[32mInt\u001b[39m) => \u001b[32mString\u001b[39m = ammonite.$sess.cmd16$Helper$$Lambda$5564/213636581@362a7344"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "// Нужно найти гео квадрат размером 5км на 5км с самой высокой средней стоимостью жилья\n",
+ "\n",
+ "val encodeGeoHash = (latitude: Double, longitude: Double, precision: Int) => {\n",
+ " val base32 = \"0123456789bcdefghjkmnpqrstuvwxyz\"\n",
+ " var (minLatitude, maxLatitude) = (-90.0, 90.0)\n",
+ " var (minLongitude, maxLongitude) = (-180.0, 180.0)\n",
+ " val bits = List(16, 8, 4, 2, 1)\n",
+ "\n",
+ " (0 until precision).map { p => {\n",
+ " base32 apply (0 until 5).map { i => {\n",
+ " if (((5 * p) + i) % 2 == 0) {\n",
+ " val mid = (minLongitude + maxLongitude) / 2.0\n",
+ " if (longitude > mid) {\n",
+ " minLongitude = mid\n",
+ " bits(i)\n",
+ " } else {\n",
+ " maxLongitude = mid\n",
+ " 0\n",
+ " }\n",
+ " } else {\n",
+ " val mid = (minLatitude + maxLatitude) / 2.0\n",
+ " if (latitude > mid) {\n",
+ " minLatitude = mid\n",
+ " bits(i)\n",
+ " } else {\n",
+ " maxLatitude = mid\n",
+ " 0\n",
+ " }\n",
+ " }\n",
+ " }\n",
+ " }.reduceLeft((a, b) => a | b)\n",
+ " }\n",
+ " }.mkString(\"\")\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mgeoHash_udf\u001b[39m: \u001b[32mexpressions\u001b[39m.\u001b[32mUserDefinedFunction\u001b[39m = \u001b[33mUserDefinedFunction\u001b[39m(\n",
+ " ammonite.$sess.cmd16$Helper$$Lambda$5564/213636581@362a7344,\n",
+ " StringType,\n",
+ " \u001b[33mSome\u001b[39m(\u001b[33mList\u001b[39m(DoubleType, DoubleType, IntegerType))\n",
+ ")"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val geoHash_udf = udf(encodeGeoHash)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 2 / 2\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd8.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 200 / 200\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | geoHash | Средняя цена | \n",
+ "
\n",
+ " | dr5wf | 350.0 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mgeoHash\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [geoHash: string, Средняя цена: double]"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val geoHash = data.withColumn(\"geoHash\", geoHash_udf(col(\"latitude\"), col(\"longitude\"), lit(5))).groupBy(\"geoHash\").agg(avg(\"price\").as(\"Средняя цена\")).orderBy(desc(\"Средняя цена\"))\n",
+ "\n",
+ "geoHash.toDF.showHTML(1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "defined \u001b[32mobject\u001b[39m \u001b[36mDecoder\u001b[39m"
+ ]
+ },
+ "execution_count": 31,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "// check source https://github.com/lifulong/geohash\n",
+ "\n",
+ "object Decoder {\n",
+ " val BASE32\t\t\t = \"0123456789bcdefghjkmnpqrstuvwxyz\"\n",
+ " val MIN_LATITUDE :Double\t= -90\n",
+ " val MAX_LATITUDE :Double\t= 90\n",
+ " val MIN_LONGITUDE :Double\t= -180\n",
+ " val MAX_LONGITUDE :Double\t= 180\n",
+ "\n",
+ " val bits = Array[Byte](16, 8, 4, 2, 1)\n",
+ " val base32 = BASE32.toCharArray\n",
+ "\n",
+ " implicit def int2byte(int: Int) = {\n",
+ " int.toByte\n",
+ " }\n",
+ "\n",
+ " def DecodeBounds(geohash: String): (Double, Double, Double, Double) = {\n",
+ "\n",
+ " def toBitList(str: String) = str.flatMap {\n",
+ " char => (\"00000\" + base32.indexOf(char).toBinaryString ).\n",
+ " reverse.take(5).reverse.map('1' == ) } toList\n",
+ "\n",
+ " def split(list: List[Boolean]): (List[Boolean], List[Boolean]) = {\n",
+ "\n",
+ " list match{\n",
+ " case Nil => (Nil,Nil)\n",
+ " case x::Nil => (x::Nil,Nil)\n",
+ " case x::y::zs => val (xs,ys) = split(zs); (x::xs,y::ys)\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " def dehash(xs: List[Boolean], min: Double, max: Double): (Double,Double) = {\n",
+ "\n",
+ " ((min,max) /: xs ) {\n",
+ " case ((min,max), bool) =>\n",
+ " if(bool) ((min + max)/2, max)\n",
+ " else (min, (min + max)/2)\n",
+ " }\n",
+ " }\n",
+ "\n",
+ " val (xs, ys) = split(toBitList(geohash))\n",
+ " val (minLat, maxLat) = dehash(ys, MIN_LATITUDE, MAX_LATITUDE)\n",
+ " val (minLng, maxLng) = dehash(xs, MIN_LONGITUDE, MAX_LONGITUDE)\n",
+ "\n",
+ " (minLat, maxLat, minLng, maxLng)\n",
+ " }\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collect at cmd32.sc:1\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 2 / 2\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collect at cmd32.sc:1\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 200 / 200\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collect at cmd32.sc:1\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 200 / 200\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collect at cmd32.sc:1\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 61 / 61\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mcoordinates\u001b[39m: (\u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m) = (\n",
+ " \u001b[32m40.5615234375\u001b[39m,\n",
+ " \u001b[32m40.60546875\u001b[39m,\n",
+ " \u001b[32m-73.740234375\u001b[39m,\n",
+ " \u001b[32m-73.6962890625\u001b[39m\n",
+ ")"
+ ]
+ },
+ "execution_count": 33,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val coordinates = Decoder.DecodeBounds(geoHash.select(\"geoHash\").collect().map(_(0)).head.toString())"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Scala",
+ "language": "scala",
+ "name": "scala"
+ },
+ "language_info": {
+ "codemirror_mode": "text/x-scala",
+ "file_extension": ".sc",
+ "mimetype": "text/x-scala",
+ "name": "scala",
+ "nbconvert_exporter": "script",
+ "version": "2.12.11"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/hw1/boriskin/hw1.ipynb b/hw1/boriskin/hw1.ipynb
new file mode 100644
index 0000000..9375973
--- /dev/null
+++ b/hw1/boriskin/hw1.ipynb
@@ -0,0 +1,12306 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \u001b[39m"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import $ivy.`org.apache.spark::spark-sql:2.4.0`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\u001b[39m"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import org.apache.spark.sql._"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n",
+ "\u001b[39m"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import org.apache.log4j.{Level, Logger}\n",
+ "Logger.getLogger(\"org\").setLevel(Level.OFF)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loading spark-stubs\n",
+ "Creating SparkSession\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "Spark UI"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@18bdf22f"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import $ivy.`sh.almond::almond-spark:0.6.0`\n",
+ "\n",
+ "val spark = {\n",
+ " NotebookSparkSession.builder()\n",
+ " .master(\"local[*]\")\n",
+ " .getOrCreate()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def sc = spark.sparkContext"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mrdd\u001b[39m: \u001b[32morg\u001b[39m.\u001b[32mapache\u001b[39m.\u001b[32mspark\u001b[39m.\u001b[32mrdd\u001b[39m.\u001b[32mRDD\u001b[39m[\u001b[32mInt\u001b[39m] = ParallelCollectionRDD[0] at parallelize at cmd8.sc:1"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val rdd = sc.parallelize(1 to 100000000, 100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n",
+ "\n",
+ "\u001b[39m"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import spark.implicits._\n",
+ "\n",
+ "org.apache.spark.sql.catalyst.encoders.OuterScopes.addOuterScope(this);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " load at cmd10.sc:6\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " load at cmd10.sc:15\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mtrainData\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [id: string, keyword: string ... 3 more fields]\n",
+ "\u001b[36mtestData\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [id: string, keyword: string ... 2 more fields]"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "var trainData = spark.sqlContext.read\n",
+ " .format(\"com.databricks.spark.csv\")\n",
+ " .option(\"header\", \"true\")\n",
+ " .option(\"mode\", \"DROPMALFORMED\")\n",
+ " .option(\"escape\", \"\\\"\")\n",
+ " .load(\"train.csv\")\n",
+ " .filter($\"text\".isNotNull)\n",
+ " .filter($\"target\".isNotNull)\n",
+ "\n",
+ "var testData = spark.sqlContext.read\n",
+ " .format(\"com.databricks.spark.csv\")\n",
+ " .option(\"header\", \"true\")\n",
+ " .option(\"mode\", \"DROPMALFORMED\")\n",
+ " .option(\"escape\", \"\\\"\")\n",
+ " .load(\"test.csv\")\n",
+ " .filter($\"text\".isNotNull)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mres11_0\u001b[39m: \u001b[32mArray\u001b[39m[\u001b[32mString\u001b[39m] = \u001b[33mArray\u001b[39m(\u001b[32m\"id\"\u001b[39m, \u001b[32m\"keyword\"\u001b[39m, \u001b[32m\"location\"\u001b[39m, \u001b[32m\"text\"\u001b[39m, \u001b[32m\"target\"\u001b[39m)\n",
+ "\u001b[36mres11_1\u001b[39m: \u001b[32mArray\u001b[39m[\u001b[32mString\u001b[39m] = \u001b[33mArray\u001b[39m(\u001b[32m\"id\"\u001b[39m, \u001b[32m\"keyword\"\u001b[39m, \u001b[32m\"location\"\u001b[39m, \u001b[32m\"text\"\u001b[39m)"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "trainData.columns\n",
+ "testData.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "defined \u001b[32mclass\u001b[39m \u001b[36mRichDF\u001b[39m"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "implicit class RichDF(val df: DataFrame) {\n",
+ " def showHTML(limit:Int = 20, truncate: Int = 20) = {\n",
+ " import xml.Utility.escape\n",
+ " val data = df.take(limit)\n",
+ " val header = df.schema.fieldNames.toSeq\n",
+ " val rows: Seq[Seq[String]] = data.map { row =>\n",
+ " row.toSeq.map { cell =>\n",
+ " val str = cell match {\n",
+ " case null => \"null\"\n",
+ " case binary: Array[Byte] => binary.map(\"%02X\".format(_)).mkString(\"[\", \" \", \"]\")\n",
+ " case array: Array[_] => array.mkString(\"[\", \", \", \"]\")\n",
+ " case seq: Seq[_] => seq.mkString(\"[\", \", \", \"]\")\n",
+ " case _ => cell.toString\n",
+ " }\n",
+ " if (truncate > 0 && str.length > truncate) {\n",
+ " // do not show ellipses for strings shorter than 4 characters.\n",
+ " if (truncate < 4) str.substring(0, truncate)\n",
+ " else str.substring(0, truncate - 3) + \"...\"\n",
+ " } else {\n",
+ " str\n",
+ " }\n",
+ " }: Seq[String]\n",
+ " }\n",
+ "\n",
+ " publish.html(s\"\"\"\n",
+ " \n",
+ " \n",
+ " ${header.map(h => s\"| ${escape(h)} | \").mkString}\n",
+ "
\n",
+ " ${rows.map { row =>\n",
+ " s\"${row.map { c => s\"| ${escape(c)} | \" }.mkString}
\"\n",
+ " }.mkString\n",
+ " }\n",
+ "
\"\"\")\n",
+ " }\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd12.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | id | keyword | location | text | target | \n",
+ "
\n",
+ " | 1 | null | null | Our Deeds are the... | 1 |
| 4 | null | null | Forest fire near ... | 1 |
| 5 | null | null | All residents ask... | 1 |
| 6 | null | null | 13,000 people rec... | 1 |
| 7 | null | null | Just got sent thi... | 1 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "trainData.showHTML(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd12.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | id | keyword | location | text | \n",
+ "
\n",
+ " | 0 | null | null | Just happened a t... |
| 2 | null | null | Heard about #eart... |
| 3 | null | null | there is a forest... |
| 9 | null | null | Apocalypse lighti... |
| 11 | null | null | Typhoon Soudelor ... |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "testData.showHTML(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd15.sc:1\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd15.sc:1\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | summary | id | keyword | location | text | target | \n",
+ "
\n",
+ " | count | 7176 | 7115 | 4771 | 7176 | 7176 |
| mean | 5434.829152731327 | null | 8412.318181818182 | null | 0.42934782608695654 |
| stddev | 3142.3697249082106 | null | 24663.97600393154 | null | 0.49501759344035795 |
| min | 1 | ablaze | | ! Residents Retur... | 0 |
| max | 9998 | wrecked | åø\\_(?)_/åø | åÈMGN-AFRICAå¨ pi... | 1 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "trainData.describe().showHTML()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd16.sc:1\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd16.sc:1\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | summary | id | keyword | location | text | \n",
+ "
\n",
+ " | count | 3261 | 3235 | 2156 | 3261 |
| mean | 5425.076050291322 | null | 98593.33333333333 | null |
| stddev | 3146.111609080526 | null | 225255.45173927903 | null |
| min | 0 | ablaze | | ! Sex-themed e-bo... |
| max | 9999 | wrecked | å©hicago | Û÷SexistÛª peer... |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "testData.describe().showHTML()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd17.sc:9\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd17.sc:9\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd17.sc:11\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd17.sc:11\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36mvegas._\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36mvegas.data.External._\n",
+ "\n",
+ "\u001b[39m"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "// only Scala 2.11\n",
+ "import $ivy.`org.vegas-viz::vegas:0.3.11`\n",
+ "\n",
+ "NotebookSparkSession.sync()\n",
+ "\n",
+ "import vegas._\n",
+ "import vegas.data.External._\n",
+ "\n",
+ "Vegas(\"There are more tweets with target 0 (no disaster) than target 1 (disaster tweets)\").\n",
+ " withData(Seq(\n",
+ " Map(\"value\" -> \"0\", \"count\" ->\n",
+ " trainData.filter(\"target = 0\").describe().select(\"target\").first.getString(0).toInt),\n",
+ " Map(\"value\" -> \"1\", \"count\" ->\n",
+ " trainData.filter(\"target = 1\").describe().select(\"target\").first.getString(0).toInt)\n",
+ " )).\n",
+ " encodeX(\"value\", Ordinal).\n",
+ " encodeY(\"count\", Quantitative).\n",
+ " mark(Bar).\n",
+ " show"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd12.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | id | keyword | location | text | target | \n",
+ "
\n",
+ " | 1 | null | null | our deeds are the... | 1 |
| 4 | null | null | forest fire near ... | 1 |
| 5 | null | null | all residents ask... | 1 |
| 6 | null | null | 13 000 people rec... | 1 |
| 7 | null | null | just got sent thi... | 1 |
| 8 | null | null | rockyfire update... | 1 |
| 10 | null | null | flood disaster ... | 1 |
| 13 | null | null | i am on top of th... | 1 |
| 14 | null | null | there is an emerg... | 1 |
| 15 | null | null | i am afraid that ... | 1 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[36mcleanTrainData\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, keyword: string ... 3 more fields]"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import org.apache.spark.sql.functions._\n",
+ "\n",
+ "var cleanTrainData = trainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"n\\'t\", \" not\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'re\", \" are\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'s\", \" is\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'d\", \" would\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ll\", \" will\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'t\", \" not\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ve\", \" have\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'m\", \" am\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"won\\'t\", \"will not\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"can\\'t\", \"can not\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"<.*?>+\", \"\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\\\W\", \" \"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\n\", \"\"))\n",
+ "cleanTrainData = cleanTrainData.withColumn(\"text\", lower(col(\"text\")))\n",
+ "\n",
+ "cleanTrainData.showHTML(10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.Pipeline\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.PipelineStage\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.classification.GBTClassifier\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.HashingTF\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.IDF\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.RegexTokenizer\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.StopWordsRemover\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.StringIndexer\u001b[39m"
+ ]
+ },
+ "execution_count": 20,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import $ivy.`org.apache.spark::spark-mllib:2.4.3`\n",
+ "\n",
+ "NotebookSparkSession.sync()\n",
+ "\n",
+ "import org.apache.spark.ml.Pipeline\n",
+ "import org.apache.spark.ml.PipelineStage\n",
+ "import org.apache.spark.ml.classification.GBTClassifier\n",
+ "import org.apache.spark.ml.feature.HashingTF\n",
+ "import org.apache.spark.ml.feature.IDF\n",
+ "import org.apache.spark.ml.feature.RegexTokenizer\n",
+ "import org.apache.spark.ml.feature.StopWordsRemover\n",
+ "import org.apache.spark.ml.feature.StringIndexer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.mllib.feature.Stemmer\u001b[39m"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import $ivy.`com.github.master:spark-stemming_2.10:0.2.1`\n",
+ "\n",
+ "NotebookSparkSession.sync()\n",
+ "\n",
+ "import org.apache.spark.mllib.feature.Stemmer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mregexTokenizer\u001b[39m: \u001b[32mRegexTokenizer\u001b[39m = regexTok_226e407d419f\n",
+ "\u001b[36mstopWordsRemover\u001b[39m: \u001b[32mStopWordsRemover\u001b[39m = stopWords_fd4638462783\n",
+ "\u001b[36mstemmer\u001b[39m: \u001b[32mStemmer\u001b[39m = stemmer_5019915b3c8c\n",
+ "\u001b[36mhashingTF\u001b[39m: \u001b[32mHashingTF\u001b[39m = hashingTF_6a7c3984d72f\n",
+ "\u001b[36midf\u001b[39m: \u001b[32mIDF\u001b[39m = idf_dd3fc989f4bd\n",
+ "\u001b[36mstringIndexer\u001b[39m: \u001b[32mStringIndexer\u001b[39m = strIdx_e56124ff660c\n",
+ "\u001b[36mgbt\u001b[39m: \u001b[32mGBTClassifier\u001b[39m = gbtc_2d0fbf17ce28"
+ ]
+ },
+ "execution_count": 22,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val regexTokenizer = new RegexTokenizer()\n",
+ " .setInputCol(\"text\")\n",
+ " .setOutputCol(\"words\")\n",
+ " .setPattern(\"\\\\W\")\n",
+ "\n",
+ "val stopWordsRemover = new StopWordsRemover()\n",
+ " .setInputCol(\"words\")\n",
+ " .setOutputCol(\"removed\")\n",
+ "\n",
+ "// only Scala 2.11\n",
+ "val stemmer = new Stemmer()\n",
+ " .setInputCol(\"removed\")\n",
+ " .setOutputCol(\"stemmed\")\n",
+ " .setLanguage(\"English\")\n",
+ "\n",
+ "val hashingTF = new HashingTF()\n",
+ " .setInputCol(\"stemmed\")\n",
+ " .setNumFeatures(3000)\n",
+ " .setOutputCol(\"rawFeatures\")\n",
+ "\n",
+ "val idf = new IDF()\n",
+ " .setInputCol(\"rawFeatures\")\n",
+ " .setOutputCol(\"features\")\n",
+ "\n",
+ "val stringIndexer = new StringIndexer()\n",
+ " .setInputCol(\"target\")\n",
+ " .setOutputCol(\"indexedLabel\")\n",
+ "\n",
+ "val gbt = new GBTClassifier()\n",
+ " .setLabelCol(\"indexedLabel\")\n",
+ " .setFeaturesCol(\"features\")\n",
+ " .setPredictionCol(\"predictionTarget\")\n",
+ " .setMaxIter(30)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mpipeline\u001b[39m: \u001b[32mPipeline\u001b[39m = pipeline_fe9ff24e7f5b"
+ ]
+ },
+ "execution_count": 23,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val pipeline = new Pipeline()\n",
+ " .setStages(\n",
+ " Array(\n",
+ " regexTokenizer, stopWordsRemover, stemmer, hashingTF, idf, stringIndexer, gbt\n",
+ " )\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mcleanTestData\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, keyword: string ... 2 more fields]"
+ ]
+ },
+ "execution_count": 24,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "var cleanTestData = testData.withColumn(\"text\", regexp_replace(col(\"text\"), \"n\\'t\", \" not\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'re\", \" are\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'s\", \" is\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'d\", \" would\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ll\", \" will\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'t\", \" not\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ve\", \" have\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'m\", \" am\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"won\\'t\", \"will not\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"can\\'t\", \"can not\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"<.*?>+\", \"\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\\\W\", \" \"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\n\", \"\"))\n",
+ "cleanTestData = cleanTestData.withColumn(\"text\", lower(col(\"text\")))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " treeAggregate at IDF.scala:54\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " countByValue at StringIndexer.scala:140\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " countByValue at StringIndexer.scala:140\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at GBTClassifier.scala:183\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at DecisionTreeMetadata.scala:112\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " count at DecisionTreeMetadata.scala:118\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " flatMap at RandomForest.scala:919\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:927\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " mapPartitions at RandomForest.scala:538\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collectAsMap at RandomForest.scala:567\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mpipelineModel\u001b[39m: \u001b[32morg\u001b[39m.\u001b[32mapache\u001b[39m.\u001b[32mspark\u001b[39m.\u001b[32mml\u001b[39m.\u001b[32mPipelineModel\u001b[39m = pipeline_fe9ff24e7f5b\n",
+ "\u001b[36mfullPredictions\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, keyword: string ... 10 more fields]"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val pipelineModel = pipeline.fit(cleanTrainData)\n",
+ "val fullPredictions = pipelineModel.transform(cleanTestData)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " take at cmd12.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "20/12/12 16:29:11 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS\n",
+ "20/12/12 16:29:11 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | id | target | \n",
+ "
\n",
+ " | 0 | 0 |
| 2 | 0 |
| 3 | 1 |
| 9 | 0 |
| 11 | 1 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " save at cmd25.sc:13\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.types.IntegerType\n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[36mresult\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: int]"
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import org.apache.spark.sql.types.IntegerType\n",
+ "\n",
+ "var result = fullPredictions\n",
+ " .select(\"id\", \"predictionTarget\")\n",
+ " .withColumn(\"target\", fullPredictions(\"predictionTarget\").cast(IntegerType))\n",
+ " .drop(\"predictionTarget\")\n",
+ "\n",
+ "result.showHTML(5)\n",
+ "\n",
+ "result.write\n",
+ " .format(\"com.databricks.spark.csv\")\n",
+ " .option(\"header\", \"true\")\n",
+ " .save(\"result.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " csv at cmd26.sc:6\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd26.sc:8\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd26.sc:8\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | summary | id | target | \n",
+ "
\n",
+ " | count | 3261 | 3261 |
| mean | 5425.076050291322 | 0.2946948788715118 |
| stddev | 3146.111609080526 | 0.45597539938904524 |
| min | 0 | 0 |
| max | 9999 | 1 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mmySubmission\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: string]"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val mySubmission = spark.sqlContext.read\n",
+ " .format(\"csv\")\n",
+ " .option(\"header\", \"true\")\n",
+ " .option(\"mode\", \"DROPMALFORMED\")\n",
+ " .option(\"escape\", \"\\\"\")\n",
+ " .csv(\"result.csv/*.csv\")\n",
+ "\n",
+ "mySubmission.describe().showHTML()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " csv at cmd27.sc:6\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd27.sc:9\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd27.sc:9\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | summary | id | \n",
+ "
\n",
+ " | count | 3263 |
| mean | 5427.15292675452 |
| stddev | 3146.4272214965617 |
| min | 0 |
| max | 9999 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36msampleSubmission\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string]"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val sampleSubmission = spark.sqlContext.read\n",
+ " .format(\"csv\")\n",
+ " .option(\"header\", \"true\")\n",
+ " .option(\"mode\", \"DROPMALFORMED\")\n",
+ " .option(\"escape\", \"\\\"\")\n",
+ " .csv(\"sample_submission.csv\")\n",
+ " .select(\"id\")\n",
+ "\n",
+ "sampleSubmission.describe().showHTML(5)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " run at ThreadPoolExecutor.java:1149\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd28.sc:16\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " describe at cmd28.sc:16\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " \n",
+ " \n",
+ " | summary | id | target | \n",
+ "
\n",
+ " | count | 3263 | 3263 |
| mean | 5427.15292675452 | 0.2945142506895495 |
| stddev | 3146.4272214965617 | 0.45589395982167097 |
| min | 0 | 0 |
| max | 9999 | 1 |
\n",
+ "
"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mdf_result\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [id: string, target: string]\n",
+ "\u001b[36mdf_sampleSubmission\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [id: string]\n",
+ "\u001b[36mjoinedDF\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: string ... 1 more field]\n",
+ "\u001b[36mresultDF\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: string]"
+ ]
+ },
+ "execution_count": 29,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val df_result = mySubmission.as(\"dfresult\")\n",
+ "val df_sampleSubmission = sampleSubmission.as(\"dfsamplesubmission\")\n",
+ "\n",
+ "\n",
+ "val joinedDF = df_result\n",
+ " .join(df_sampleSubmission, \n",
+ " col(\"dfsamplesubmission.id\") === col(\"dfresult.id\"),\n",
+ " \"right\") \n",
+ "\n",
+ "var resultDF = joinedDF\n",
+ " .select(col(\"dfsamplesubmission.id\"),\n",
+ " when(col(\"dfresult.id\").isNull, lit(0))\n",
+ " .otherwise(col(\"target\"))\n",
+ " .as(\"target\"))\n",
+ "\n",
+ "resultDF.describe().showHTML()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " run at ThreadPoolExecutor.java:1149\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " save at cmd29.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " save at cmd29.sc:4\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "resultDF.repartition(1).write\n",
+ " .format(\"com.databricks.spark.csv\")\n",
+ " .option(\"header\", \"true\")\n",
+ " .save(sys.env(\"HOME\") + \"/Documents/disasterTweets/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pipelineModel.write\n",
+ " .overwrite\n",
+ " .save(sys.env(\"HOME\") + \"/Documents/technopolis/bigData2020/hw2/boriskin/model/\")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Scala",
+ "language": "scala",
+ "name": "scala"
+ },
+ "language_info": {
+ "codemirror_mode": "text/x-scala",
+ "file_extension": ".scala",
+ "mimetype": "text/x-scala",
+ "name": "scala",
+ "nbconvert_exporter": "script",
+ "version": "2.11.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/hw2/boriskin/.gitignore b/hw2/boriskin/.gitignore
new file mode 100644
index 0000000..12fae23
--- /dev/null
+++ b/hw2/boriskin/.gitignore
@@ -0,0 +1 @@
+model
diff --git a/hw2/boriskin/hw2.ipynb b/hw2/boriskin/hw2.ipynb
new file mode 100644
index 0000000..e053efa
--- /dev/null
+++ b/hw2/boriskin/hw2.ipynb
@@ -0,0 +1,1302 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \u001b[39m"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import $ivy.`org.apache.spark::spark-sql:2.4.0`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\u001b[39m"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import org.apache.spark.sql._"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n",
+ "\u001b[39m"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import org.apache.log4j.{Level, Logger}\n",
+ "Logger.getLogger(\"org\").setLevel(Level.OFF)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Loading spark-stubs\n",
+ "Creating SparkSession\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "Spark UI"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@71b497ec"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import $ivy.`sh.almond::almond-spark:0.6.0`\n",
+ "\n",
+ "val spark = {\n",
+ " NotebookSparkSession.builder()\n",
+ " .appName(\"Spark Structured Streaming\")\n",
+ " .master(\"local[*]\")\n",
+ " .getOrCreate()\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m\n",
+ "\u001b[36mrdd\u001b[39m: \u001b[32morg\u001b[39m.\u001b[32mapache\u001b[39m.\u001b[32mspark\u001b[39m.\u001b[32mrdd\u001b[39m.\u001b[32mRDD\u001b[39m[\u001b[32mInt\u001b[39m] = ParallelCollectionRDD[0] at parallelize at cmd7.sc:3"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def sc = spark.sparkContext\n",
+ "\n",
+ "val rdd = sc.parallelize(1 to 100000000, 100)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n",
+ "\n",
+ "\u001b[39m"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import spark.implicits._\n",
+ "\n",
+ "org.apache.spark.sql.catalyst.encoders.OuterScopes.addOuterScope(this);"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36msocketDF\u001b[39m: \u001b[32mDataFrame\u001b[39m = [value: string]"
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val socketDF = spark.readStream\n",
+ " .format(\"socket\")\n",
+ " .option(\"host\", \"192.168.1.111\")\n",
+ " .option(\"port\", 8065)\n",
+ " .load()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mres10\u001b[39m: \u001b[32mBoolean\u001b[39m = true"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "socketDF.isStreaming "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "root\n",
+ " |-- value: string (nullable = true)\n",
+ "\n"
+ ]
+ }
+ ],
+ "source": [
+ "socketDF.printSchema"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[36mres12_1\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@71b497ec\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.PipelineModel\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.Pipeline\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.PipelineStage\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.classification.GBTClassifier\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.HashingTF\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.IDF\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.RegexTokenizer\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.StopWordsRemover\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.StringIndexer\n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[36mres12_13\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@71b497ec\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.mllib.feature.Stemmer\u001b[39m"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import $ivy.`org.apache.spark::spark-mllib:2.4.3`\n",
+ "\n",
+ "NotebookSparkSession.sync()\n",
+ "\n",
+ "import org.apache.spark.ml.PipelineModel\n",
+ "import org.apache.spark.sql.functions._\n",
+ "\n",
+ "import org.apache.spark.ml.Pipeline\n",
+ "import org.apache.spark.ml.PipelineStage\n",
+ "import org.apache.spark.ml.classification.GBTClassifier\n",
+ "import org.apache.spark.ml.feature.HashingTF\n",
+ "import org.apache.spark.ml.feature.IDF\n",
+ "import org.apache.spark.ml.feature.RegexTokenizer\n",
+ "import org.apache.spark.ml.feature.StopWordsRemover\n",
+ "import org.apache.spark.ml.feature.StringIndexer\n",
+ "\n",
+ "import $ivy.`com.github.master:spark-stemming_2.10:0.2.1`\n",
+ "\n",
+ "NotebookSparkSession.sync()\n",
+ "\n",
+ "import org.apache.spark.mllib.feature.Stemmer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " parquet at IDF.scala:180\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " head at IDF.scala:183\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " parquet at StringIndexer.scala:313\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " head at StringIndexer.scala:315\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " first at ReadWrite.scala:615\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " parquet at treeModels.scala:436\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " sortByKey at treeModels.scala:442\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 8 / 8\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " map at treeModels.scala:437\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 8 / 8\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collect at treeModels.scala:442\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 8 / 8\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " parquet at treeModels.scala:448\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " map at treeModels.scala:450\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 8 / 8\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " sortByKey at treeModels.scala:454\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 8 / 8\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " map at treeModels.scala:450\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 8 / 8\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " collect at treeModels.scala:454\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 8 / 8\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[36mmodel\u001b[39m: \u001b[32mPipelineModel\u001b[39m = pipeline_fe9ff24e7f5b"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "val model = PipelineModel.read.load(\"model/\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n",
+ "\u001b[39m\n",
+ "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType}\n",
+ "\n",
+ "\u001b[39m\n",
+ "\u001b[36mschema\u001b[39m: \u001b[32mStructType\u001b[39m = \u001b[33mList\u001b[39m(\n",
+ " \u001b[33mStructField\u001b[39m(\u001b[32m\"id\"\u001b[39m, StringType, true, {}),\n",
+ " \u001b[33mStructField\u001b[39m(\u001b[32m\"text\"\u001b[39m, StringType, true, {})\n",
+ ")\n",
+ "\u001b[36mdfJSON\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, text: string]\n",
+ "\u001b[36mclean_data\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, text: string]"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import org.apache.spark.sql.functions._\n",
+ "import org.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType}\n",
+ "\n",
+ "val schema = new StructType()\n",
+ " .add(\"id\", StringType, true)\n",
+ " .add(\"text\", StringType, true)\n",
+ "\n",
+ "val dfJSON = socketDF.withColumn(\"jsonData\",from_json(col(\"value\"),schema))\n",
+ " .select(\"jsonData.*\")\n",
+ "\n",
+ "var clean_data = dfJSON.select(\"id\", \"text\")\n",
+ "clean_data = clean_data.na.fill(0)\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"n\\'t\", \" not\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'re\", \" are\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'s\", \" is\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'d\", \" would\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ll\", \" will\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'t\", \" not\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ve\", \" have\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'m\", \" am\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"won\\'t\", \"will not\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"can\\'t\", \"can not\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"<.*?>+\", \"\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\\\W\", \" \"))\n",
+ "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\n\", \"\"))\n",
+ "clean_data = clean_data.withColumn(\"text\", lower(col(\"text\")))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " start at cmd16.sc:10\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 8 / 8\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " start at cmd16.sc:10\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " start at cmd16.sc:10\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 8 / 8\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "20/12/12 20:50:53 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS\n",
+ "20/12/12 20:50:53 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " start at cmd16.sc:10\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " start at cmd16.sc:10\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 8 / 8\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ " start at cmd16.sc:10\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "
\n",
+ " 1 / 1\n",
+ "
\n",
+ "
\n",
+ "
\n"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "ename": "",
+ "evalue": "",
+ "output_type": "error",
+ "traceback": [
+ "Interrupted!\n sun.misc.Unsafe.park(\u001b[32mNative Method\u001b[39m)\n java.util.concurrent.locks.LockSupport.park(\u001b[32mLockSupport.java\u001b[39m:\u001b[32m175\u001b[39m)\n java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(\u001b[32mAbstractQueuedSynchronizer.java\u001b[39m:\u001b[32m836\u001b[39m)\n java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(\u001b[32mAbstractQueuedSynchronizer.java\u001b[39m:\u001b[32m997\u001b[39m)\n java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(\u001b[32mAbstractQueuedSynchronizer.java\u001b[39m:\u001b[32m1304\u001b[39m)\n java.util.concurrent.CountDownLatch.await(\u001b[32mCountDownLatch.java\u001b[39m:\u001b[32m231\u001b[39m)\n org.apache.spark.sql.execution.streaming.StreamExecution.awaitTermination(\u001b[32mStreamExecution.scala\u001b[39m:\u001b[32m467\u001b[39m)\n org.apache.spark.sql.execution.streaming.StreamingQueryWrapper.awaitTermination(\u001b[32mStreamingQueryWrapper.scala\u001b[39m:\u001b[32m53\u001b[39m)\n ammonite.$sess.cmd16$Helper.(\u001b[32mcmd16.sc\u001b[39m:\u001b[32m11\u001b[39m)\n ammonite.$sess.cmd16$.(\u001b[32mcmd16.sc\u001b[39m:\u001b[32m7\u001b[39m)\n ammonite.$sess.cmd16$.(\u001b[32mcmd16.sc\u001b[39m:\u001b[32m-1\u001b[39m)"
+ ]
+ }
+ ],
+ "source": [
+ "model.transform(clean_data).select(\"id\", \"predictionTarget\")\n",
+ " .withColumn(\"target\", col(\"predictionTarget\").cast(IntegerType))\n",
+ " .drop(\"predictionTarget\")\n",
+ " .repartition(1).writeStream\n",
+ " .outputMode(\"append\")\n",
+ " .format(\"com.databricks.spark.csv\")\n",
+ " .option(\"path\", \"path/\")\n",
+ " .option(\"checkpointLocation\", \"checkpointLocation/\")\n",
+ " .option(\"header\", \"true\")\n",
+ " .start()\n",
+ " .awaitTermination()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Scala",
+ "language": "scala",
+ "name": "scala"
+ },
+ "language_info": {
+ "codemirror_mode": "text/x-scala",
+ "file_extension": ".scala",
+ "mimetype": "text/x-scala",
+ "name": "scala",
+ "nbconvert_exporter": "script",
+ "version": "2.11.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}