diff --git a/hw0/boriskin/hw0.ipynb b/hw0/boriskin/hw0.ipynb new file mode 100644 index 0000000..e42cba7 --- /dev/null +++ b/hw0/boriskin/hw0.ipynb @@ -0,0 +1,1760 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \u001b[39m" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`org.apache.spark::spark-sql:2.4.3`" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\u001b[39m" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql._" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n", + "\u001b[39m" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.log4j.{Level, Logger}\n", + "Logger.getLogger(\"org\").setLevel(Level.OFF)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading spark-stubs\n", + "Creating SparkSession\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n" + ] + }, + { + "data": { + "text/html": [ + "Spark UI" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@6731a7cf" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val spark = {\n", + " NotebookSparkSession.builder()\n", + " .master(\"local[*]\")\n", + " .getOrCreate()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def sc = spark.sparkContext" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mrdd\u001b[39m: \u001b[32morg\u001b[39m.\u001b[32mapache\u001b[39m.\u001b[32mspark\u001b[39m.\u001b[32mrdd\u001b[39m.\u001b[32mRDD\u001b[39m[\u001b[32mInt\u001b[39m] = ParallelCollectionRDD[0] at parallelize at cmd5.sc:1" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val rdd = sc.parallelize(1 to 100000000, 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n", + "\n", + "\u001b[39m" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import spark.implicits._\n", + "\n", + "org.apache.spark.sql.catalyst.encoders.OuterScopes.addOuterScope(this);" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " csv at cmd7.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
data: DataFrame = [id: string, name: string ... 14 more fields]
\n", + "
" + ], + "text/plain": [ + "\u001b[36mdata\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, name: string ... 14 more fields]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "var data = spark.read.option(\"header\", \"true\").option(\"mode\", \"DROPMALFORMED\").option(\"escape\", \"\\\"\").csv(\"AB_NYC_2019.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defined \u001b[32mclass\u001b[39m \u001b[36mRichDF\u001b[39m" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "implicit class RichDF(val df: DataFrame) {\n", + " def showHTML(limit:Int = 20, truncate: Int = 20) = {\n", + " import xml.Utility.escape\n", + " val data = df.take(limit)\n", + " val header = df.schema.fieldNames.toSeq\n", + " val rows: Seq[Seq[String]] = data.map { row =>\n", + " row.toSeq.map { cell =>\n", + " val str = cell match {\n", + " case null => \"null\"\n", + " case binary: Array[Byte] => binary.map(\"%02X\".format(_)).mkString(\"[\", \" \", \"]\")\n", + " case array: Array[_] => array.mkString(\"[\", \", \", \"]\")\n", + " case seq: Seq[_] => seq.mkString(\"[\", \", \", \"]\")\n", + " case _ => cell.toString\n", + " }\n", + " if (truncate > 0 && str.length > truncate) {\n", + " // do not show ellipses for strings shorter than 4 characters.\n", + " if (truncate < 4) str.substring(0, truncate)\n", + " else str.substring(0, truncate - 3) + \"...\"\n", + " } else {\n", + " str\n", + " }\n", + " }: Seq[String]\n", + " }\n", + "\n", + " publish.html(s\"\"\"\n", + " \n", + " \n", + " ${header.map(h => s\"\").mkString}\n", + " \n", + " ${rows.map { row =>\n", + " s\"${row.map { c => s\"\" }.mkString}\"\n", + " }.mkString\n", + " }\n", + "
${escape(h)}
${escape(c)}
\"\"\")\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
2539Clean & quiet apt...2787JohnBrooklynKensington40.64749-73.97237Private room149192018-10-190.216365
2595Skylit Midtown Ca...2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt2251452019-05-210.382355
3647THE VILLAGE OF HA...4632ElisabethManhattanHarlem40.80902-73.9419Private room15030nullnull1365
3831Cozy Entire Floor...4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt8912702019-07-054.641194
5022Entire Apt: Spaci...7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt801092018-11-190.1010
5099Large Cozy 1 BR A...7322ChrisManhattanMurray Hill40.74767-73.975Entire home/apt2003742019-06-220.591129
5121BlissArtsSpace!7356GaronBrooklynBedford-Stuyvesant40.68688-73.95596Private room6045492017-10-050.4010
5178Large Furnished R...8967ShunichiManhattanHell's Kitchen40.76489-73.98493Private room7924302019-06-243.471220
5203Cozy Clean Guest ...7490MaryEllenManhattanUpper West Side40.80178-73.96723Private room7921182017-07-210.9910
5238Cute & Cozy Lower...7549BenManhattanChinatown40.71344-73.99037Entire home/apt15011602019-06-091.334188
5295Beautiful 1br on ...7702LenaManhattanUpper West Side40.80316-73.96545Entire home/apt1355532019-06-220.4316
5441Central Manhattan...7989KateManhattanHell's Kitchen40.76076-73.98867Private room8521882019-06-231.50139
5803Lovely Room 1, Ga...9744LaurieBrooklynSouth Slope40.66829-73.98779Private room8941672019-06-241.343314
6021Wonderful Guest B...11528ClaudioManhattanUpper West Side40.79826-73.96113Private room8521132019-07-050.911333
6090West Village Nest...11975AlinaManhattanWest Village40.7353-74.00525Entire home/apt12090272018-10-310.2210
6848Only 2 stops to M...15991Allen & IrinaBrooklynWilliamsburg40.70837-73.95352Entire home/apt14021482019-06-291.20146
7097Perfect for Your ...17571JaneBrooklynFort Greene40.69169-73.97185Entire home/apt21521982019-06-281.721321
7322Chelsea Perfect18946DotiManhattanChelsea40.74192-73.99501Private room14012602019-07-012.12112
7726Hip Historic Brow...20950Adam And CharityBrooklynCrown Heights40.67592-73.94694Entire home/apt993532019-06-224.44121
7750Huge 2 BR Upper E...17985SingManhattanEast Harlem40.79685-73.94872Entire home/apt19070nullnull2249
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "data.toDF.showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.expressions.Window\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions.{avg, callUDF, col, corr, desc, lit, row_number, udf, variance}\u001b[39m" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql.expressions.Window\n", + "import org.apache.spark.sql.functions.{avg, callUDF, col, corr, desc, lit, row_number, udf, variance}" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
2539Clean & quiet apt...2787JohnBrooklynKensington40.64749-73.97237Private room149192018-10-190.216365
2595Skylit Midtown Ca...2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt2251452019-05-210.382355
3831Cozy Entire Floor...4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt8912702019-07-054.641194
5022Entire Apt: Spaci...7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt801092018-11-190.1010
5099Large Cozy 1 BR A...7322ChrisManhattanMurray Hill40.74767-73.975Entire home/apt2003742019-06-220.591129
5121BlissArtsSpace!7356GaronBrooklynBedford-Stuyvesant40.68688-73.95596Private room6045492017-10-050.4010
5178Large Furnished R...8967ShunichiManhattanHell's Kitchen40.76489-73.98493Private room7924302019-06-243.471220
5203Cozy Clean Guest ...7490MaryEllenManhattanUpper West Side40.80178-73.96723Private room7921182017-07-210.9910
5238Cute & Cozy Lower...7549BenManhattanChinatown40.71344-73.99037Entire home/apt15011602019-06-091.334188
5295Beautiful 1br on ...7702LenaManhattanUpper West Side40.80316-73.96545Entire home/apt1355532019-06-220.4316
5441Central Manhattan...7989KateManhattanHell's Kitchen40.76076-73.98867Private room8521882019-06-231.50139
5803Lovely Room 1, Ga...9744LaurieBrooklynSouth Slope40.66829-73.98779Private room8941672019-06-241.343314
6021Wonderful Guest B...11528ClaudioManhattanUpper West Side40.79826-73.96113Private room8521132019-07-050.911333
6090West Village Nest...11975AlinaManhattanWest Village40.7353-74.00525Entire home/apt12090272018-10-310.2210
6848Only 2 stops to M...15991Allen & IrinaBrooklynWilliamsburg40.70837-73.95352Entire home/apt14021482019-06-291.20146
7097Perfect for Your ...17571JaneBrooklynFort Greene40.69169-73.97185Entire home/apt21521982019-06-281.721321
7322Chelsea Perfect18946DotiManhattanChelsea40.74192-73.99501Private room14012602019-07-012.12112
7726Hip Historic Brow...20950Adam And CharityBrooklynCrown Heights40.67592-73.94694Entire home/apt993532019-06-224.44121
7801Sweet and Spaciou...21207ChayaBrooklynWilliamsburg40.71842-73.95718Entire home/apt299392011-12-280.0710
8024CBG CtyBGd HelpsH...22486LiselBrooklynPark Slope40.68069-73.97706Private room13021302019-07-011.096347
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36msqlContext\u001b[39m: \u001b[32mSQLContext\u001b[39m = org.apache.spark.sql.SQLContext@7d933b13" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "// Фильтр по ячейкам, где есть Null-значения\n", + "\n", + "val sqlContext = spark.sqlContext\n", + "\n", + "sqlContext.createDataFrame(data.rdd.filter(x=> Range(0, x.length).count(x.isNullAt(_)) < 1 ), data.schema).showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "// Фильтр на предложения, где цена нулевая и где минимальное количество ночей больше 365:\n", + "\n", + "data = data.where(col(\"price\") > 0)\n", + "\n", + "data = data.where(col(\"minimum_nights\") < 366)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Медиана:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 4 / 4\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 20 / 20\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 25 / 25\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
room_typeМедиана
511.0
2054.0
547.0
2001.0
2792.0
1382.0
692.0
425.0
Shared room45.0
4251.0
592.0
2502.0
1601.0
852.0
351.0
41031.0
2983.0
1881.0
991.0
1103.0
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Мода:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 200 / 200\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 4 / 4\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 20 / 20\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 25 / 25\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
room_typeprice
511
2054
547
2001
2792
1382
692
425
Shared room35
4251
592
2502
1601
852
355
41031
1881
2983
991
1103
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Среднее:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 4 / 4\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 20 / 20\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 25 / 25\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
room_typeСреднее
511.0
2054.0
547.0
2001.0
2792.0
1382.0
691.7142857142857142
425.0
Shared room70.21991341991342
4251.0
597.8
2502.0
1602.0
852.0
353.0
41031.0
2983.0
1881.0
992.0
1103.0
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Дисперсия:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 4 / 4\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 20 / 20\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 25 / 25\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
room_typevar_samp(price)
510.0
205NaN
54NaN
200NaN
279NaN
138NaN
690.23809523809523805
42NaN
Shared room10382.929065850383
425NaN
59154.2
2500.0
1602.0
85NaN
358.0
410NaN
298NaN
188NaN
991.3333333333333333
110NaN
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "// Посчитать медиану, моду и среднее, и дисперсию для каждого room_type\n", + "\n", + "println(\"Медиана:\")\n", + "\n", + "data.groupBy(\"room_type\").agg(callUDF(\"percentile_approx\", col(\"price\"), lit(0.5)).as(\"Медиана\")).toDF.showHTML()\n", + "\n", + "println(\"\")\n", + "println(\"\")\n", + "println(\"Мода:\")\n", + "\n", + "data.groupBy(\"room_type\", \"price\").count().withColumn(\"row_number\", row_number().over(Window.partitionBy(\"room_type\").orderBy(desc(\"count\")))).select(\"room_type\", \"price\").where(col(\"row_number\") === 1).toDF.showHTML()\n", + "\n", + "println(\"\")\n", + "println(\"\")\n", + "println(\"Среднее:\")\n", + "\n", + "data.groupBy(\"room_type\").agg(avg(\"price\").as(\"Среднее\")).toDF.showHTML()\n", + "\n", + "println(\"\")\n", + "println(\"\")\n", + "println(\"Дисперсия:\")\n", + "\n", + "data.select(\"room_type\", \"price\").groupBy(\"room_type\").agg(variance(\"price\")).as(\"Дисперсия\").toDF.showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Самое дешевое:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
24114389Very Spacious bed...180661875SalimManhattanUpper West Side40.76844-73.98333Private room10122018-04-230.1310
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Самое дорогое:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
313402832br - The Heart o...4382127MattManhattanLower East Side40.7198-73.98566Entire home/apt9999300nullnull1365
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "// Найти самое дорогое и самое дешевое предложение\n", + "\n", + "println(\"Самое дешевое:\")\n", + "\n", + "data.orderBy(\"price\").showHTML(1)\n", + "\n", + "println(\"\")\n", + "println(\"\")\n", + "println(\"Самое дорогое:\")\n", + "\n", + "data.orderBy(desc(\"price\")).showHTML(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Корреляцию между ценой и минимальным количеством ночей:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
corr(price, minimum_nights)
0.049635888865656445
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Корреляцию между ценой и количеством отзывов:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
corr(price, number_of_reviews)
-0.04804569425619054
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "// Посчитать корреляцию между ценой и минимальным количеством ночей, кол-вом отзывов\n", + "\n", + "println(\"Корреляцию между ценой и минимальным количеством ночей:\")\n", + "\n", + "data.agg(corr(\"price\", \"minimum_nights\")).toDF.showHTML()\n", + "\n", + "println(\"\")\n", + "println(\"\")\n", + "println(\"Корреляцию между ценой и количеством отзывов:\")\n", + "\n", + "data.agg(corr(\"price\", \"number_of_reviews\")).toDF.showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mencodeGeoHash\u001b[39m: (\u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m, \u001b[32mInt\u001b[39m) => \u001b[32mString\u001b[39m = ammonite.$sess.cmd16$Helper$$Lambda$5564/213636581@362a7344" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "// Нужно найти гео квадрат размером 5км на 5км с самой высокой средней стоимостью жилья\n", + "\n", + "val encodeGeoHash = (latitude: Double, longitude: Double, precision: Int) => {\n", + " val base32 = \"0123456789bcdefghjkmnpqrstuvwxyz\"\n", + " var (minLatitude, maxLatitude) = (-90.0, 90.0)\n", + " var (minLongitude, maxLongitude) = (-180.0, 180.0)\n", + " val bits = List(16, 8, 4, 2, 1)\n", + "\n", + " (0 until precision).map { p => {\n", + " base32 apply (0 until 5).map { i => {\n", + " if (((5 * p) + i) % 2 == 0) {\n", + " val mid = (minLongitude + maxLongitude) / 2.0\n", + " if (longitude > mid) {\n", + " minLongitude = mid\n", + " bits(i)\n", + " } else {\n", + " maxLongitude = mid\n", + " 0\n", + " }\n", + " } else {\n", + " val mid = (minLatitude + maxLatitude) / 2.0\n", + " if (latitude > mid) {\n", + " minLatitude = mid\n", + " bits(i)\n", + " } else {\n", + " maxLatitude = mid\n", + " 0\n", + " }\n", + " }\n", + " }\n", + " }.reduceLeft((a, b) => a | b)\n", + " }\n", + " }.mkString(\"\")\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mgeoHash_udf\u001b[39m: \u001b[32mexpressions\u001b[39m.\u001b[32mUserDefinedFunction\u001b[39m = \u001b[33mUserDefinedFunction\u001b[39m(\n", + " ammonite.$sess.cmd16$Helper$$Lambda$5564/213636581@362a7344,\n", + " StringType,\n", + " \u001b[33mSome\u001b[39m(\u001b[33mList\u001b[39m(DoubleType, DoubleType, IntegerType))\n", + ")" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val geoHash_udf = udf(encodeGeoHash)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 200 / 200\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geoHashСредняя цена
dr5wf350.0
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mgeoHash\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [geoHash: string, Средняя цена: double]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val geoHash = data.withColumn(\"geoHash\", geoHash_udf(col(\"latitude\"), col(\"longitude\"), lit(5))).groupBy(\"geoHash\").agg(avg(\"price\").as(\"Средняя цена\")).orderBy(desc(\"Средняя цена\"))\n", + "\n", + "geoHash.toDF.showHTML(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defined \u001b[32mobject\u001b[39m \u001b[36mDecoder\u001b[39m" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "// check source https://github.com/lifulong/geohash\n", + "\n", + "object Decoder {\n", + " val BASE32\t\t\t = \"0123456789bcdefghjkmnpqrstuvwxyz\"\n", + " val MIN_LATITUDE :Double\t= -90\n", + " val MAX_LATITUDE :Double\t= 90\n", + " val MIN_LONGITUDE :Double\t= -180\n", + " val MAX_LONGITUDE :Double\t= 180\n", + "\n", + " val bits = Array[Byte](16, 8, 4, 2, 1)\n", + " val base32 = BASE32.toCharArray\n", + "\n", + " implicit def int2byte(int: Int) = {\n", + " int.toByte\n", + " }\n", + "\n", + " def DecodeBounds(geohash: String): (Double, Double, Double, Double) = {\n", + "\n", + " def toBitList(str: String) = str.flatMap {\n", + " char => (\"00000\" + base32.indexOf(char).toBinaryString ).\n", + " reverse.take(5).reverse.map('1' == ) } toList\n", + "\n", + " def split(list: List[Boolean]): (List[Boolean], List[Boolean]) = {\n", + "\n", + " list match{\n", + " case Nil => (Nil,Nil)\n", + " case x::Nil => (x::Nil,Nil)\n", + " case x::y::zs => val (xs,ys) = split(zs); (x::xs,y::ys)\n", + " }\n", + " }\n", + "\n", + " def dehash(xs: List[Boolean], min: Double, max: Double): (Double,Double) = {\n", + "\n", + " ((min,max) /: xs ) {\n", + " case ((min,max), bool) =>\n", + " if(bool) ((min + max)/2, max)\n", + " else (min, (min + max)/2)\n", + " }\n", + " }\n", + "\n", + " val (xs, ys) = split(toBitList(geohash))\n", + " val (minLat, maxLat) = dehash(ys, MIN_LATITUDE, MAX_LATITUDE)\n", + " val (minLng, maxLng) = dehash(xs, MIN_LONGITUDE, MAX_LONGITUDE)\n", + "\n", + " (minLat, maxLat, minLng, maxLng)\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " collect at cmd32.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collect at cmd32.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 200 / 200\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collect at cmd32.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 200 / 200\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collect at cmd32.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 61 / 61\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mcoordinates\u001b[39m: (\u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m) = (\n", + " \u001b[32m40.5615234375\u001b[39m,\n", + " \u001b[32m40.60546875\u001b[39m,\n", + " \u001b[32m-73.740234375\u001b[39m,\n", + " \u001b[32m-73.6962890625\u001b[39m\n", + ")" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val coordinates = Decoder.DecodeBounds(geoHash.select(\"geoHash\").collect().map(_(0)).head.toString())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala", + "language": "scala", + "name": "scala" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".sc", + "mimetype": "text/x-scala", + "name": "scala", + "nbconvert_exporter": "script", + "version": "2.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/hw1/boriskin/hw1.ipynb b/hw1/boriskin/hw1.ipynb new file mode 100644 index 0000000..3e91be1 --- /dev/null +++ b/hw1/boriskin/hw1.ipynb @@ -0,0 +1,12289 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \u001b[39m" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`org.apache.spark::spark-sql:2.4.0`" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\u001b[39m" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql._" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n", + "\u001b[39m" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.log4j.{Level, Logger}\n", + "Logger.getLogger(\"org\").setLevel(Level.OFF)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading spark-stubs\n", + "Creating SparkSession\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n" + ] + }, + { + "data": { + "text/html": [ + "Spark UI" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n", + "\n", + "\u001b[39m\n", + "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@4b6889db" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`sh.almond::almond-spark:0.6.0`\n", + "\n", + "val spark = {\n", + " NotebookSparkSession.builder()\n", + " .master(\"local[*]\")\n", + " .getOrCreate()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def sc = spark.sparkContext" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mrdd\u001b[39m: \u001b[32morg\u001b[39m.\u001b[32mapache\u001b[39m.\u001b[32mspark\u001b[39m.\u001b[32mrdd\u001b[39m.\u001b[32mRDD\u001b[39m[\u001b[32mInt\u001b[39m] = ParallelCollectionRDD[0] at parallelize at cmd11.sc:1" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val rdd = sc.parallelize(1 to 100000000, 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n", + "\n", + "\u001b[39m" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import spark.implicits._\n", + "\n", + "org.apache.spark.sql.catalyst.encoders.OuterScopes.addOuterScope(this);" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " load at cmd13.sc:6\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " load at cmd13.sc:15\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mtrainData\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [id: string, keyword: string ... 3 more fields]\n", + "\u001b[36mtestData\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, keyword: string ... 2 more fields]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "var trainData = spark.sqlContext.read\n", + " .format(\"com.databricks.spark.csv\")\n", + " .option(\"header\", \"true\")\n", + " .option(\"mode\", \"DROPMALFORMED\")\n", + " .option(\"escape\", \"\\\"\")\n", + " .load(\"train.csv\")\n", + " .filter($\"text\".isNotNull)\n", + " .filter($\"target\".isNotNull)\n", + "\n", + "var testData = spark.sqlContext.read\n", + " .format(\"com.databricks.spark.csv\")\n", + " .option(\"header\", \"true\")\n", + " .option(\"mode\", \"DROPMALFORMED\")\n", + " .option(\"escape\", \"\\\"\")\n", + " .load(\"test.csv\")\n", + " .filter($\"text\".isNotNull)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mres14_0\u001b[39m: \u001b[32mArray\u001b[39m[\u001b[32mString\u001b[39m] = \u001b[33mArray\u001b[39m(\u001b[32m\"id\"\u001b[39m, \u001b[32m\"keyword\"\u001b[39m, \u001b[32m\"location\"\u001b[39m, \u001b[32m\"text\"\u001b[39m, \u001b[32m\"target\"\u001b[39m)\n", + "\u001b[36mres14_1\u001b[39m: \u001b[32mArray\u001b[39m[\u001b[32mString\u001b[39m] = \u001b[33mArray\u001b[39m(\u001b[32m\"id\"\u001b[39m, \u001b[32m\"keyword\"\u001b[39m, \u001b[32m\"location\"\u001b[39m, \u001b[32m\"text\"\u001b[39m)" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainData.columns\n", + "testData.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defined \u001b[32mclass\u001b[39m \u001b[36mRichDF\u001b[39m" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "implicit class RichDF(val df: DataFrame) {\n", + " def showHTML(limit:Int = 20, truncate: Int = 20) = {\n", + " import xml.Utility.escape\n", + " val data = df.take(limit)\n", + " val header = df.schema.fieldNames.toSeq\n", + " val rows: Seq[Seq[String]] = data.map { row =>\n", + " row.toSeq.map { cell =>\n", + " val str = cell match {\n", + " case null => \"null\"\n", + " case binary: Array[Byte] => binary.map(\"%02X\".format(_)).mkString(\"[\", \" \", \"]\")\n", + " case array: Array[_] => array.mkString(\"[\", \", \", \"]\")\n", + " case seq: Seq[_] => seq.mkString(\"[\", \", \", \"]\")\n", + " case _ => cell.toString\n", + " }\n", + " if (truncate > 0 && str.length > truncate) {\n", + " // do not show ellipses for strings shorter than 4 characters.\n", + " if (truncate < 4) str.substring(0, truncate)\n", + " else str.substring(0, truncate - 3) + \"...\"\n", + " } else {\n", + " str\n", + " }\n", + " }: Seq[String]\n", + " }\n", + "\n", + " publish.html(s\"\"\"\n", + " \n", + " \n", + " ${header.map(h => s\"\").mkString}\n", + " \n", + " ${rows.map { row =>\n", + " s\"${row.map { c => s\"\" }.mkString}\"\n", + " }.mkString\n", + " }\n", + "
${escape(h)}
${escape(c)}
\"\"\")\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd15.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idkeywordlocationtexttarget
1nullnullOur Deeds are the...1
4nullnullForest fire near ...1
5nullnullAll residents ask...1
6nullnull13,000 people rec...1
7nullnullJust got sent thi...1
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "trainData.showHTML(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd15.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idkeywordlocationtext
0nullnullJust happened a t...
2nullnullHeard about #eart...
3nullnullthere is a forest...
9nullnullApocalypse lighti...
11nullnullTyphoon Soudelor ...
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "testData.showHTML(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " describe at cmd18.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd18.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summaryidkeywordlocationtexttarget
count71767115477171767176
mean5434.829152731327null8412.318181818182null0.42934782608695654
stddev3142.3697249082106null24663.97600393154null0.49501759344035795
min1ablaze ! Residents Retur...0
max9998wreckedåø\\_(?)_/åøåÈMGN-AFRICAå¨ pi...1
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "trainData.describe().showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " describe at cmd19.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd19.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summaryidkeywordlocationtext
count3261323521563261
mean5425.076050291322null98593.33333333333null
stddev3146.111609080526null225255.45173927903null
min0ablaze ! Sex-themed e-bo...
max9999wreckedå©hicago‰Û÷Sexist‰Ûª peer...
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "testData.describe().showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " describe at cmd21.sc:9\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd21.sc:9\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd21.sc:11\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd21.sc:11\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n", + "\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36mvegas._\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36mvegas.data.External._\n", + "\n", + "\u001b[39m" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "// only Scala 2.11\n", + "import $ivy.`org.vegas-viz::vegas:0.3.11`\n", + "\n", + "import vegas._\n", + "import vegas.data.External._\n", + "\n", + "Vegas(\"There are more tweets with target 0 (no disaster) than target 1 (disaster tweets)\").\n", + " withData(Seq(\n", + " Map(\"value\" -> \"0\", \"count\" ->\n", + " trainData.filter(\"target = 0\").describe().select(\"target\").first.getString(0).toInt),\n", + " Map(\"value\" -> \"1\", \"count\" ->\n", + " trainData.filter(\"target = 1\").describe().select(\"target\").first.getString(0).toInt)\n", + " )).\n", + " encodeX(\"value\", Ordinal).\n", + " encodeY(\"count\", Quantitative).\n", + " mark(Bar).\n", + " show" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd15.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idkeywordlocationtexttarget
1nullnullour deeds are the...1
4nullnullforest fire near ...1
5nullnullall residents ask...1
6nullnull13 000 people rec...1
7nullnulljust got sent thi...1
8nullnull rockyfire update...1
10nullnull flood disaster ...1
13nullnulli am on top of th...1
14nullnullthere is an emerg...1
15nullnulli am afraid that ...1
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n", + "\n", + "\u001b[39m\n", + "\u001b[36mcleanTrainData\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, keyword: string ... 3 more fields]" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql.functions._\n", + "\n", + "var cleanTrainData = trainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"n\\'t\", \" not\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'re\", \" are\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'s\", \" is\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'d\", \" would\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ll\", \" will\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'t\", \" not\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ve\", \" have\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'m\", \" am\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"won\\'t\", \"will not\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"can\\'t\", \"can not\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"<.*?>+\", \"\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\\\W\", \" \"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\n\", \"\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", lower(col(\"text\")))\n", + "\n", + "cleanTrainData.showHTML(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n", + "\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.Pipeline\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.PipelineStage\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.classification.GBTClassifier\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.HashingTF\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.IDF\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.RegexTokenizer\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.StopWordsRemover\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.StringIndexer\u001b[39m" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`org.apache.spark::spark-mllib:2.4.3`\n", + "\n", + "import org.apache.spark.ml.Pipeline\n", + "import org.apache.spark.ml.PipelineStage\n", + "import org.apache.spark.ml.classification.GBTClassifier\n", + "import org.apache.spark.ml.feature.HashingTF\n", + "import org.apache.spark.ml.feature.IDF\n", + "import org.apache.spark.ml.feature.RegexTokenizer\n", + "import org.apache.spark.ml.feature.StopWordsRemover\n", + "import org.apache.spark.ml.feature.StringIndexer" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n", + "\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.mllib.feature.Stemmer\u001b[39m" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`com.github.master:spark-stemming_2.10:0.2.1`\n", + "\n", + "import org.apache.spark.mllib.feature.Stemmer" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mregexTokenizer\u001b[39m: \u001b[32mRegexTokenizer\u001b[39m = regexTok_4401742f2c50\n", + "\u001b[36mstopWordsRemover\u001b[39m: \u001b[32mStopWordsRemover\u001b[39m = stopWords_fd7831553d9a\n", + "\u001b[36mstemmer\u001b[39m: \u001b[32mStemmer\u001b[39m = stemmer_4dd04471ed37\n", + "\u001b[36mhashingTF\u001b[39m: \u001b[32mHashingTF\u001b[39m = hashingTF_348821016f8a\n", + "\u001b[36midf\u001b[39m: \u001b[32mIDF\u001b[39m = idf_abcf437516ac\n", + "\u001b[36mstringIndexer\u001b[39m: \u001b[32mStringIndexer\u001b[39m = strIdx_c6bf2c7d6324\n", + "\u001b[36mgbt\u001b[39m: \u001b[32mGBTClassifier\u001b[39m = gbtc_0ed24fb6da48" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val regexTokenizer = new RegexTokenizer()\n", + " .setInputCol(\"text\")\n", + " .setOutputCol(\"words\")\n", + " .setPattern(\"\\\\W\")\n", + "\n", + "val stopWordsRemover = new StopWordsRemover()\n", + " .setInputCol(\"words\")\n", + " .setOutputCol(\"removed\")\n", + "\n", + "// only Scala 2.11\n", + "val stemmer = new Stemmer()\n", + " .setInputCol(\"removed\")\n", + " .setOutputCol(\"stemmed\")\n", + " .setLanguage(\"English\")\n", + "\n", + "val hashingTF = new HashingTF()\n", + " .setInputCol(\"stemmed\")\n", + " .setNumFeatures(3000)\n", + " .setOutputCol(\"rawFeatures\")\n", + "\n", + "val idf = new IDF()\n", + " .setInputCol(\"rawFeatures\")\n", + " .setOutputCol(\"features\")\n", + "\n", + "val stringIndexer = new StringIndexer()\n", + " .setInputCol(\"target\")\n", + " .setOutputCol(\"indexedLabel\")\n", + "\n", + "val gbt = new GBTClassifier()\n", + " .setLabelCol(\"indexedLabel\")\n", + " .setFeaturesCol(\"features\")\n", + " .setPredictionCol(\"predictionTarget\")\n", + " .setMaxIter(30)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mpipeline\u001b[39m: \u001b[32mPipeline\u001b[39m = pipeline_3d4163ae470f" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val pipeline = new Pipeline()\n", + " .setStages(\n", + " Array(\n", + " regexTokenizer, stopWordsRemover, stemmer, hashingTF, idf, stringIndexer, gbt\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mcleanTestData\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, keyword: string ... 2 more fields]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "var cleanTestData = testData.withColumn(\"text\", regexp_replace(col(\"text\"), \"n\\'t\", \" not\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'re\", \" are\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'s\", \" is\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'d\", \" would\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ll\", \" will\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'t\", \" not\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ve\", \" have\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'m\", \" am\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"won\\'t\", \"will not\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"can\\'t\", \"can not\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"<.*?>+\", \"\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\\\W\", \" \"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\n\", \"\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", lower(col(\"text\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " treeAggregate at IDF.scala:54\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " countByValue at StringIndexer.scala:140\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " countByValue at StringIndexer.scala:140\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at GBTClassifier.scala:183\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mpipelineModel\u001b[39m: \u001b[32morg\u001b[39m.\u001b[32mapache\u001b[39m.\u001b[32mspark\u001b[39m.\u001b[32mml\u001b[39m.\u001b[32mPipelineModel\u001b[39m = pipeline_3d4163ae470f\n", + "\u001b[36mfullPredictions\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, keyword: string ... 10 more fields]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val pipelineModel = pipeline.fit(cleanTrainData)\n", + "val fullPredictions = pipelineModel.transform(cleanTestData)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd15.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "20/11/22 17:31:46 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS\n", + "20/11/22 17:31:46 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtarget
00
20
31
90
111
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " save at cmd31.sc:15\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.types.IntegerType\n", + "\n", + "\u001b[39m\n", + "\u001b[36mresult\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: int]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql.types.IntegerType\n", + "\n", + "var result = fullPredictions\n", + " .select(\"id\", \"predictionTarget\")\n", + " .withColumn(\"target\", fullPredictions(\"predictionTarget\").cast(IntegerType))\n", + " .drop(\"predictionTarget\")\n", + "\n", + "result.showHTML(5)\n", + "\n", + "result.write\n", + " .format(\"com.databricks.spark.csv\")\n", + " .option(\"header\", \"true\")\n", + " .save(\"result.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " csv at cmd32.sc:6\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd32.sc:8\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd32.sc:8\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summaryidtarget
count32613261
mean5425.0760502913220.2946948788715118
stddev3146.1116090805260.45597539938904524
min00
max99991
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mmySubmission\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: string]" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val mySubmission = spark.sqlContext.read\n", + " .format(\"csv\")\n", + " .option(\"header\", \"true\")\n", + " .option(\"mode\", \"DROPMALFORMED\")\n", + " .option(\"escape\", \"\\\"\")\n", + " .csv(\"result.csv/*.csv\")\n", + "\n", + "mySubmission.describe().showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " csv at cmd33.sc:6\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd33.sc:9\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd33.sc:9\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summaryid
count3263
mean5427.15292675452
stddev3146.4272214965617
min0
max9999
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36msampleSubmission\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val sampleSubmission = spark.sqlContext.read\n", + " .format(\"csv\")\n", + " .option(\"header\", \"true\")\n", + " .option(\"mode\", \"DROPMALFORMED\")\n", + " .option(\"escape\", \"\\\"\")\n", + " .csv(\"sample_submission.csv\")\n", + " .select(\"id\")\n", + "\n", + "sampleSubmission.describe().showHTML(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " run at ThreadPoolExecutor.java:1149\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd34.sc:16\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd34.sc:16\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summaryidtarget
count32633263
mean5427.152926754520.2945142506895495
stddev3146.42722149656170.45589395982167097
min00
max99991
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mdf_result\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [id: string, target: string]\n", + "\u001b[36mdf_sampleSubmission\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [id: string]\n", + "\u001b[36mjoinedDF\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: string ... 1 more field]\n", + "\u001b[36mresultDF\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: string]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val df_result = mySubmission.as(\"dfresult\")\n", + "val df_sampleSubmission = sampleSubmission.as(\"dfsamplesubmission\")\n", + "\n", + "\n", + "val joinedDF = df_result\n", + " .join(df_sampleSubmission, \n", + " col(\"dfsamplesubmission.id\") === col(\"dfresult.id\"),\n", + " \"right\") \n", + "\n", + "var resultDF = joinedDF\n", + " .select(col(\"dfsamplesubmission.id\"),\n", + " when(col(\"dfresult.id\").isNull, lit(0))\n", + " .otherwise(col(\"target\"))\n", + " .as(\"target\"))\n", + "\n", + "resultDF.describe().showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " run at ThreadPoolExecutor.java:1149\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " save at cmd35.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " save at cmd35.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "resultDF.repartition(1).write\n", + " .format(\"com.databricks.spark.csv\")\n", + " .option(\"header\", \"true\")\n", + " .save(sys.env(\"HOME\") + \"/Documents/disasterTweets/\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala", + "language": "scala", + "name": "scala" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".scala", + "mimetype": "text/x-scala", + "name": "scala", + "nbconvert_exporter": "script", + "version": "2.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}