diff --git a/hw0/boriskin/hw0.ipynb b/hw0/boriskin/hw0.ipynb new file mode 100644 index 0000000..e42cba7 --- /dev/null +++ b/hw0/boriskin/hw0.ipynb @@ -0,0 +1,1760 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \u001b[39m" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`org.apache.spark::spark-sql:2.4.3`" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\u001b[39m" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql._" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n", + "\u001b[39m" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.log4j.{Level, Logger}\n", + "Logger.getLogger(\"org\").setLevel(Level.OFF)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading spark-stubs\n", + "Creating SparkSession\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n" + ] + }, + { + "data": { + "text/html": [ + "Spark UI" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@6731a7cf" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val spark = {\n", + " NotebookSparkSession.builder()\n", + " .master(\"local[*]\")\n", + " .getOrCreate()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def sc = spark.sparkContext" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mrdd\u001b[39m: \u001b[32morg\u001b[39m.\u001b[32mapache\u001b[39m.\u001b[32mspark\u001b[39m.\u001b[32mrdd\u001b[39m.\u001b[32mRDD\u001b[39m[\u001b[32mInt\u001b[39m] = ParallelCollectionRDD[0] at parallelize at cmd5.sc:1" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val rdd = sc.parallelize(1 to 100000000, 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n", + "\n", + "\u001b[39m" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import spark.implicits._\n", + "\n", + "org.apache.spark.sql.catalyst.encoders.OuterScopes.addOuterScope(this);" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " csv at cmd7.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
data: DataFrame = [id: string, name: string ... 14 more fields]
\n", + "
" + ], + "text/plain": [ + "\u001b[36mdata\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, name: string ... 14 more fields]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "var data = spark.read.option(\"header\", \"true\").option(\"mode\", \"DROPMALFORMED\").option(\"escape\", \"\\\"\").csv(\"AB_NYC_2019.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defined \u001b[32mclass\u001b[39m \u001b[36mRichDF\u001b[39m" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "implicit class RichDF(val df: DataFrame) {\n", + " def showHTML(limit:Int = 20, truncate: Int = 20) = {\n", + " import xml.Utility.escape\n", + " val data = df.take(limit)\n", + " val header = df.schema.fieldNames.toSeq\n", + " val rows: Seq[Seq[String]] = data.map { row =>\n", + " row.toSeq.map { cell =>\n", + " val str = cell match {\n", + " case null => \"null\"\n", + " case binary: Array[Byte] => binary.map(\"%02X\".format(_)).mkString(\"[\", \" \", \"]\")\n", + " case array: Array[_] => array.mkString(\"[\", \", \", \"]\")\n", + " case seq: Seq[_] => seq.mkString(\"[\", \", \", \"]\")\n", + " case _ => cell.toString\n", + " }\n", + " if (truncate > 0 && str.length > truncate) {\n", + " // do not show ellipses for strings shorter than 4 characters.\n", + " if (truncate < 4) str.substring(0, truncate)\n", + " else str.substring(0, truncate - 3) + \"...\"\n", + " } else {\n", + " str\n", + " }\n", + " }: Seq[String]\n", + " }\n", + "\n", + " publish.html(s\"\"\"\n", + " \n", + " \n", + " ${header.map(h => s\"\").mkString}\n", + " \n", + " ${rows.map { row =>\n", + " s\"${row.map { c => s\"\" }.mkString}\"\n", + " }.mkString\n", + " }\n", + "
${escape(h)}
${escape(c)}
\"\"\")\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": { + "scrolled": false + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
2539Clean & quiet apt...2787JohnBrooklynKensington40.64749-73.97237Private room149192018-10-190.216365
2595Skylit Midtown Ca...2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt2251452019-05-210.382355
3647THE VILLAGE OF HA...4632ElisabethManhattanHarlem40.80902-73.9419Private room15030nullnull1365
3831Cozy Entire Floor...4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt8912702019-07-054.641194
5022Entire Apt: Spaci...7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt801092018-11-190.1010
5099Large Cozy 1 BR A...7322ChrisManhattanMurray Hill40.74767-73.975Entire home/apt2003742019-06-220.591129
5121BlissArtsSpace!7356GaronBrooklynBedford-Stuyvesant40.68688-73.95596Private room6045492017-10-050.4010
5178Large Furnished R...8967ShunichiManhattanHell's Kitchen40.76489-73.98493Private room7924302019-06-243.471220
5203Cozy Clean Guest ...7490MaryEllenManhattanUpper West Side40.80178-73.96723Private room7921182017-07-210.9910
5238Cute & Cozy Lower...7549BenManhattanChinatown40.71344-73.99037Entire home/apt15011602019-06-091.334188
5295Beautiful 1br on ...7702LenaManhattanUpper West Side40.80316-73.96545Entire home/apt1355532019-06-220.4316
5441Central Manhattan...7989KateManhattanHell's Kitchen40.76076-73.98867Private room8521882019-06-231.50139
5803Lovely Room 1, Ga...9744LaurieBrooklynSouth Slope40.66829-73.98779Private room8941672019-06-241.343314
6021Wonderful Guest B...11528ClaudioManhattanUpper West Side40.79826-73.96113Private room8521132019-07-050.911333
6090West Village Nest...11975AlinaManhattanWest Village40.7353-74.00525Entire home/apt12090272018-10-310.2210
6848Only 2 stops to M...15991Allen & IrinaBrooklynWilliamsburg40.70837-73.95352Entire home/apt14021482019-06-291.20146
7097Perfect for Your ...17571JaneBrooklynFort Greene40.69169-73.97185Entire home/apt21521982019-06-281.721321
7322Chelsea Perfect18946DotiManhattanChelsea40.74192-73.99501Private room14012602019-07-012.12112
7726Hip Historic Brow...20950Adam And CharityBrooklynCrown Heights40.67592-73.94694Entire home/apt993532019-06-224.44121
7750Huge 2 BR Upper E...17985SingManhattanEast Harlem40.79685-73.94872Entire home/apt19070nullnull2249
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "data.toDF.showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.expressions.Window\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions.{avg, callUDF, col, corr, desc, lit, row_number, udf, variance}\u001b[39m" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql.expressions.Window\n", + "import org.apache.spark.sql.functions.{avg, callUDF, col, corr, desc, lit, row_number, udf, variance}" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
2539Clean & quiet apt...2787JohnBrooklynKensington40.64749-73.97237Private room149192018-10-190.216365
2595Skylit Midtown Ca...2845JenniferManhattanMidtown40.75362-73.98377Entire home/apt2251452019-05-210.382355
3831Cozy Entire Floor...4869LisaRoxanneBrooklynClinton Hill40.68514-73.95976Entire home/apt8912702019-07-054.641194
5022Entire Apt: Spaci...7192LauraManhattanEast Harlem40.79851-73.94399Entire home/apt801092018-11-190.1010
5099Large Cozy 1 BR A...7322ChrisManhattanMurray Hill40.74767-73.975Entire home/apt2003742019-06-220.591129
5121BlissArtsSpace!7356GaronBrooklynBedford-Stuyvesant40.68688-73.95596Private room6045492017-10-050.4010
5178Large Furnished R...8967ShunichiManhattanHell's Kitchen40.76489-73.98493Private room7924302019-06-243.471220
5203Cozy Clean Guest ...7490MaryEllenManhattanUpper West Side40.80178-73.96723Private room7921182017-07-210.9910
5238Cute & Cozy Lower...7549BenManhattanChinatown40.71344-73.99037Entire home/apt15011602019-06-091.334188
5295Beautiful 1br on ...7702LenaManhattanUpper West Side40.80316-73.96545Entire home/apt1355532019-06-220.4316
5441Central Manhattan...7989KateManhattanHell's Kitchen40.76076-73.98867Private room8521882019-06-231.50139
5803Lovely Room 1, Ga...9744LaurieBrooklynSouth Slope40.66829-73.98779Private room8941672019-06-241.343314
6021Wonderful Guest B...11528ClaudioManhattanUpper West Side40.79826-73.96113Private room8521132019-07-050.911333
6090West Village Nest...11975AlinaManhattanWest Village40.7353-74.00525Entire home/apt12090272018-10-310.2210
6848Only 2 stops to M...15991Allen & IrinaBrooklynWilliamsburg40.70837-73.95352Entire home/apt14021482019-06-291.20146
7097Perfect for Your ...17571JaneBrooklynFort Greene40.69169-73.97185Entire home/apt21521982019-06-281.721321
7322Chelsea Perfect18946DotiManhattanChelsea40.74192-73.99501Private room14012602019-07-012.12112
7726Hip Historic Brow...20950Adam And CharityBrooklynCrown Heights40.67592-73.94694Entire home/apt993532019-06-224.44121
7801Sweet and Spaciou...21207ChayaBrooklynWilliamsburg40.71842-73.95718Entire home/apt299392011-12-280.0710
8024CBG CtyBGd HelpsH...22486LiselBrooklynPark Slope40.68069-73.97706Private room13021302019-07-011.096347
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36msqlContext\u001b[39m: \u001b[32mSQLContext\u001b[39m = org.apache.spark.sql.SQLContext@7d933b13" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "// Фильтр по ячейкам, где есть Null-значения\n", + "\n", + "val sqlContext = spark.sqlContext\n", + "\n", + "sqlContext.createDataFrame(data.rdd.filter(x=> Range(0, x.length).count(x.isNullAt(_)) < 1 ), data.schema).showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "// Фильтр на предложения, где цена нулевая и где минимальное количество ночей больше 365:\n", + "\n", + "data = data.where(col(\"price\") > 0)\n", + "\n", + "data = data.where(col(\"minimum_nights\") < 366)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Медиана:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 4 / 4\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 20 / 20\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 25 / 25\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
room_typeМедиана
511.0
2054.0
547.0
2001.0
2792.0
1382.0
692.0
425.0
Shared room45.0
4251.0
592.0
2502.0
1601.0
852.0
351.0
41031.0
2983.0
1881.0
991.0
1103.0
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Мода:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 200 / 200\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 4 / 4\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 20 / 20\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 25 / 25\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
room_typeprice
511
2054
547
2001
2792
1382
692
425
Shared room35
4251
592
2502
1601
852
355
41031
1881
2983
991
1103
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Среднее:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 4 / 4\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 20 / 20\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 25 / 25\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
room_typeСреднее
511.0
2054.0
547.0
2001.0
2792.0
1382.0
691.7142857142857142
425.0
Shared room70.21991341991342
4251.0
597.8
2502.0
1602.0
852.0
353.0
41031.0
2983.0
1881.0
992.0
1103.0
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Дисперсия:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 4 / 4\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 20 / 20\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 25 / 25\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
room_typevar_samp(price)
510.0
205NaN
54NaN
200NaN
279NaN
138NaN
690.23809523809523805
42NaN
Shared room10382.929065850383
425NaN
59154.2
2500.0
1602.0
85NaN
358.0
410NaN
298NaN
188NaN
991.3333333333333333
110NaN
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "// Посчитать медиану, моду и среднее, и дисперсию для каждого room_type\n", + "\n", + "println(\"Медиана:\")\n", + "\n", + "data.groupBy(\"room_type\").agg(callUDF(\"percentile_approx\", col(\"price\"), lit(0.5)).as(\"Медиана\")).toDF.showHTML()\n", + "\n", + "println(\"\")\n", + "println(\"\")\n", + "println(\"Мода:\")\n", + "\n", + "data.groupBy(\"room_type\", \"price\").count().withColumn(\"row_number\", row_number().over(Window.partitionBy(\"room_type\").orderBy(desc(\"count\")))).select(\"room_type\", \"price\").where(col(\"row_number\") === 1).toDF.showHTML()\n", + "\n", + "println(\"\")\n", + "println(\"\")\n", + "println(\"Среднее:\")\n", + "\n", + "data.groupBy(\"room_type\").agg(avg(\"price\").as(\"Среднее\")).toDF.showHTML()\n", + "\n", + "println(\"\")\n", + "println(\"\")\n", + "println(\"Дисперсия:\")\n", + "\n", + "data.select(\"room_type\", \"price\").groupBy(\"room_type\").agg(variance(\"price\")).as(\"Дисперсия\").toDF.showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Самое дешевое:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
24114389Very Spacious bed...180661875SalimManhattanUpper West Side40.76844-73.98333Private room10122018-04-230.1310
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Самое дорогое:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idnamehost_idhost_nameneighbourhood_groupneighbourhoodlatitudelongituderoom_typepriceminimum_nightsnumber_of_reviewslast_reviewreviews_per_monthcalculated_host_listings_countavailability_365
313402832br - The Heart o...4382127MattManhattanLower East Side40.7198-73.98566Entire home/apt9999300nullnull1365
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "// Найти самое дорогое и самое дешевое предложение\n", + "\n", + "println(\"Самое дешевое:\")\n", + "\n", + "data.orderBy(\"price\").showHTML(1)\n", + "\n", + "println(\"\")\n", + "println(\"\")\n", + "println(\"Самое дорогое:\")\n", + "\n", + "data.orderBy(desc(\"price\")).showHTML(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Корреляцию между ценой и минимальным количеством ночей:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
corr(price, minimum_nights)
0.049635888865656445
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Корреляцию между ценой и количеством отзывов:\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
corr(price, number_of_reviews)
-0.04804569425619054
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "// Посчитать корреляцию между ценой и минимальным количеством ночей, кол-вом отзывов\n", + "\n", + "println(\"Корреляцию между ценой и минимальным количеством ночей:\")\n", + "\n", + "data.agg(corr(\"price\", \"minimum_nights\")).toDF.showHTML()\n", + "\n", + "println(\"\")\n", + "println(\"\")\n", + "println(\"Корреляцию между ценой и количеством отзывов:\")\n", + "\n", + "data.agg(corr(\"price\", \"number_of_reviews\")).toDF.showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mencodeGeoHash\u001b[39m: (\u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m, \u001b[32mInt\u001b[39m) => \u001b[32mString\u001b[39m = ammonite.$sess.cmd16$Helper$$Lambda$5564/213636581@362a7344" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "// Нужно найти гео квадрат размером 5км на 5км с самой высокой средней стоимостью жилья\n", + "\n", + "val encodeGeoHash = (latitude: Double, longitude: Double, precision: Int) => {\n", + " val base32 = \"0123456789bcdefghjkmnpqrstuvwxyz\"\n", + " var (minLatitude, maxLatitude) = (-90.0, 90.0)\n", + " var (minLongitude, maxLongitude) = (-180.0, 180.0)\n", + " val bits = List(16, 8, 4, 2, 1)\n", + "\n", + " (0 until precision).map { p => {\n", + " base32 apply (0 until 5).map { i => {\n", + " if (((5 * p) + i) % 2 == 0) {\n", + " val mid = (minLongitude + maxLongitude) / 2.0\n", + " if (longitude > mid) {\n", + " minLongitude = mid\n", + " bits(i)\n", + " } else {\n", + " maxLongitude = mid\n", + " 0\n", + " }\n", + " } else {\n", + " val mid = (minLatitude + maxLatitude) / 2.0\n", + " if (latitude > mid) {\n", + " minLatitude = mid\n", + " bits(i)\n", + " } else {\n", + " maxLatitude = mid\n", + " 0\n", + " }\n", + " }\n", + " }\n", + " }.reduceLeft((a, b) => a | b)\n", + " }\n", + " }.mkString(\"\")\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mgeoHash_udf\u001b[39m: \u001b[32mexpressions\u001b[39m.\u001b[32mUserDefinedFunction\u001b[39m = \u001b[33mUserDefinedFunction\u001b[39m(\n", + " ammonite.$sess.cmd16$Helper$$Lambda$5564/213636581@362a7344,\n", + " StringType,\n", + " \u001b[33mSome\u001b[39m(\u001b[33mList\u001b[39m(DoubleType, DoubleType, IntegerType))\n", + ")" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val geoHash_udf = udf(encodeGeoHash)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at cmd8.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 200 / 200\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
geoHashСредняя цена
dr5wf350.0
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mgeoHash\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [geoHash: string, Средняя цена: double]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val geoHash = data.withColumn(\"geoHash\", geoHash_udf(col(\"latitude\"), col(\"longitude\"), lit(5))).groupBy(\"geoHash\").agg(avg(\"price\").as(\"Средняя цена\")).orderBy(desc(\"Средняя цена\"))\n", + "\n", + "geoHash.toDF.showHTML(1)" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defined \u001b[32mobject\u001b[39m \u001b[36mDecoder\u001b[39m" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "// check source https://github.com/lifulong/geohash\n", + "\n", + "object Decoder {\n", + " val BASE32\t\t\t = \"0123456789bcdefghjkmnpqrstuvwxyz\"\n", + " val MIN_LATITUDE :Double\t= -90\n", + " val MAX_LATITUDE :Double\t= 90\n", + " val MIN_LONGITUDE :Double\t= -180\n", + " val MAX_LONGITUDE :Double\t= 180\n", + "\n", + " val bits = Array[Byte](16, 8, 4, 2, 1)\n", + " val base32 = BASE32.toCharArray\n", + "\n", + " implicit def int2byte(int: Int) = {\n", + " int.toByte\n", + " }\n", + "\n", + " def DecodeBounds(geohash: String): (Double, Double, Double, Double) = {\n", + "\n", + " def toBitList(str: String) = str.flatMap {\n", + " char => (\"00000\" + base32.indexOf(char).toBinaryString ).\n", + " reverse.take(5).reverse.map('1' == ) } toList\n", + "\n", + " def split(list: List[Boolean]): (List[Boolean], List[Boolean]) = {\n", + "\n", + " list match{\n", + " case Nil => (Nil,Nil)\n", + " case x::Nil => (x::Nil,Nil)\n", + " case x::y::zs => val (xs,ys) = split(zs); (x::xs,y::ys)\n", + " }\n", + " }\n", + "\n", + " def dehash(xs: List[Boolean], min: Double, max: Double): (Double,Double) = {\n", + "\n", + " ((min,max) /: xs ) {\n", + " case ((min,max), bool) =>\n", + " if(bool) ((min + max)/2, max)\n", + " else (min, (min + max)/2)\n", + " }\n", + " }\n", + "\n", + " val (xs, ys) = split(toBitList(geohash))\n", + " val (minLat, maxLat) = dehash(ys, MIN_LATITUDE, MAX_LATITUDE)\n", + " val (minLng, maxLng) = dehash(xs, MIN_LONGITUDE, MAX_LONGITUDE)\n", + "\n", + " (minLat, maxLat, minLng, maxLng)\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " collect at cmd32.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 2 / 2\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collect at cmd32.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 200 / 200\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collect at cmd32.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 200 / 200\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collect at cmd32.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 61 / 61\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mcoordinates\u001b[39m: (\u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m, \u001b[32mDouble\u001b[39m) = (\n", + " \u001b[32m40.5615234375\u001b[39m,\n", + " \u001b[32m40.60546875\u001b[39m,\n", + " \u001b[32m-73.740234375\u001b[39m,\n", + " \u001b[32m-73.6962890625\u001b[39m\n", + ")" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val coordinates = Decoder.DecodeBounds(geoHash.select(\"geoHash\").collect().map(_(0)).head.toString())" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala", + "language": "scala", + "name": "scala" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".sc", + "mimetype": "text/x-scala", + "name": "scala", + "nbconvert_exporter": "script", + "version": "2.12.11" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/hw1/boriskin/hw1.ipynb b/hw1/boriskin/hw1.ipynb new file mode 100644 index 0000000..9375973 --- /dev/null +++ b/hw1/boriskin/hw1.ipynb @@ -0,0 +1,12306 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \u001b[39m" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`org.apache.spark::spark-sql:2.4.0`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\u001b[39m" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql._" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n", + "\u001b[39m" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.log4j.{Level, Logger}\n", + "Logger.getLogger(\"org\").setLevel(Level.OFF)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading spark-stubs\n", + "Creating SparkSession\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n" + ] + }, + { + "data": { + "text/html": [ + "Spark UI" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n", + "\n", + "\u001b[39m\n", + "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@18bdf22f" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`sh.almond::almond-spark:0.6.0`\n", + "\n", + "val spark = {\n", + " NotebookSparkSession.builder()\n", + " .master(\"local[*]\")\n", + " .getOrCreate()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def sc = spark.sparkContext" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mrdd\u001b[39m: \u001b[32morg\u001b[39m.\u001b[32mapache\u001b[39m.\u001b[32mspark\u001b[39m.\u001b[32mrdd\u001b[39m.\u001b[32mRDD\u001b[39m[\u001b[32mInt\u001b[39m] = ParallelCollectionRDD[0] at parallelize at cmd8.sc:1" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val rdd = sc.parallelize(1 to 100000000, 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n", + "\n", + "\u001b[39m" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import spark.implicits._\n", + "\n", + "org.apache.spark.sql.catalyst.encoders.OuterScopes.addOuterScope(this);" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " load at cmd10.sc:6\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " load at cmd10.sc:15\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mtrainData\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [id: string, keyword: string ... 3 more fields]\n", + "\u001b[36mtestData\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [id: string, keyword: string ... 2 more fields]" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "var trainData = spark.sqlContext.read\n", + " .format(\"com.databricks.spark.csv\")\n", + " .option(\"header\", \"true\")\n", + " .option(\"mode\", \"DROPMALFORMED\")\n", + " .option(\"escape\", \"\\\"\")\n", + " .load(\"train.csv\")\n", + " .filter($\"text\".isNotNull)\n", + " .filter($\"target\".isNotNull)\n", + "\n", + "var testData = spark.sqlContext.read\n", + " .format(\"com.databricks.spark.csv\")\n", + " .option(\"header\", \"true\")\n", + " .option(\"mode\", \"DROPMALFORMED\")\n", + " .option(\"escape\", \"\\\"\")\n", + " .load(\"test.csv\")\n", + " .filter($\"text\".isNotNull)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mres11_0\u001b[39m: \u001b[32mArray\u001b[39m[\u001b[32mString\u001b[39m] = \u001b[33mArray\u001b[39m(\u001b[32m\"id\"\u001b[39m, \u001b[32m\"keyword\"\u001b[39m, \u001b[32m\"location\"\u001b[39m, \u001b[32m\"text\"\u001b[39m, \u001b[32m\"target\"\u001b[39m)\n", + "\u001b[36mres11_1\u001b[39m: \u001b[32mArray\u001b[39m[\u001b[32mString\u001b[39m] = \u001b[33mArray\u001b[39m(\u001b[32m\"id\"\u001b[39m, \u001b[32m\"keyword\"\u001b[39m, \u001b[32m\"location\"\u001b[39m, \u001b[32m\"text\"\u001b[39m)" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trainData.columns\n", + "testData.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defined \u001b[32mclass\u001b[39m \u001b[36mRichDF\u001b[39m" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "implicit class RichDF(val df: DataFrame) {\n", + " def showHTML(limit:Int = 20, truncate: Int = 20) = {\n", + " import xml.Utility.escape\n", + " val data = df.take(limit)\n", + " val header = df.schema.fieldNames.toSeq\n", + " val rows: Seq[Seq[String]] = data.map { row =>\n", + " row.toSeq.map { cell =>\n", + " val str = cell match {\n", + " case null => \"null\"\n", + " case binary: Array[Byte] => binary.map(\"%02X\".format(_)).mkString(\"[\", \" \", \"]\")\n", + " case array: Array[_] => array.mkString(\"[\", \", \", \"]\")\n", + " case seq: Seq[_] => seq.mkString(\"[\", \", \", \"]\")\n", + " case _ => cell.toString\n", + " }\n", + " if (truncate > 0 && str.length > truncate) {\n", + " // do not show ellipses for strings shorter than 4 characters.\n", + " if (truncate < 4) str.substring(0, truncate)\n", + " else str.substring(0, truncate - 3) + \"...\"\n", + " } else {\n", + " str\n", + " }\n", + " }: Seq[String]\n", + " }\n", + "\n", + " publish.html(s\"\"\"\n", + " \n", + " \n", + " ${header.map(h => s\"\").mkString}\n", + " \n", + " ${rows.map { row =>\n", + " s\"${row.map { c => s\"\" }.mkString}\"\n", + " }.mkString\n", + " }\n", + "
${escape(h)}
${escape(c)}
\"\"\")\n", + " }\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd12.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idkeywordlocationtexttarget
1nullnullOur Deeds are the...1
4nullnullForest fire near ...1
5nullnullAll residents ask...1
6nullnull13,000 people rec...1
7nullnullJust got sent thi...1
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "trainData.showHTML(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd12.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idkeywordlocationtext
0nullnullJust happened a t...
2nullnullHeard about #eart...
3nullnullthere is a forest...
9nullnullApocalypse lighti...
11nullnullTyphoon Soudelor ...
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "testData.showHTML(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " describe at cmd15.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd15.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summaryidkeywordlocationtexttarget
count71767115477171767176
mean5434.829152731327null8412.318181818182null0.42934782608695654
stddev3142.3697249082106null24663.97600393154null0.49501759344035795
min1ablaze ! Residents Retur...0
max9998wreckedåø\\_(?)_/åøåÈMGN-AFRICAå¨ pi...1
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "trainData.describe().showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " describe at cmd16.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd16.sc:1\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summaryidkeywordlocationtext
count3261323521563261
mean5425.076050291322null98593.33333333333null
stddev3146.111609080526null225255.45173927903null
min0ablaze ! Sex-themed e-bo...
max9999wreckedå©hicago‰Û÷Sexist‰Ûª peer...
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "testData.describe().showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " describe at cmd17.sc:9\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd17.sc:9\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd17.sc:11\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd17.sc:11\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n", + "\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36mvegas._\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36mvegas.data.External._\n", + "\n", + "\u001b[39m" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "// only Scala 2.11\n", + "import $ivy.`org.vegas-viz::vegas:0.3.11`\n", + "\n", + "NotebookSparkSession.sync()\n", + "\n", + "import vegas._\n", + "import vegas.data.External._\n", + "\n", + "Vegas(\"There are more tweets with target 0 (no disaster) than target 1 (disaster tweets)\").\n", + " withData(Seq(\n", + " Map(\"value\" -> \"0\", \"count\" ->\n", + " trainData.filter(\"target = 0\").describe().select(\"target\").first.getString(0).toInt),\n", + " Map(\"value\" -> \"1\", \"count\" ->\n", + " trainData.filter(\"target = 1\").describe().select(\"target\").first.getString(0).toInt)\n", + " )).\n", + " encodeX(\"value\", Ordinal).\n", + " encodeY(\"count\", Quantitative).\n", + " mark(Bar).\n", + " show" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd12.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idkeywordlocationtexttarget
1nullnullour deeds are the...1
4nullnullforest fire near ...1
5nullnullall residents ask...1
6nullnull13 000 people rec...1
7nullnulljust got sent thi...1
8nullnull rockyfire update...1
10nullnull flood disaster ...1
13nullnulli am on top of th...1
14nullnullthere is an emerg...1
15nullnulli am afraid that ...1
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n", + "\n", + "\u001b[39m\n", + "\u001b[36mcleanTrainData\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, keyword: string ... 3 more fields]" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql.functions._\n", + "\n", + "var cleanTrainData = trainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"n\\'t\", \" not\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'re\", \" are\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'s\", \" is\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'d\", \" would\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ll\", \" will\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'t\", \" not\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ve\", \" have\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'m\", \" am\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"won\\'t\", \"will not\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"can\\'t\", \"can not\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"<.*?>+\", \"\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\\\W\", \" \"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\n\", \"\"))\n", + "cleanTrainData = cleanTrainData.withColumn(\"text\", lower(col(\"text\")))\n", + "\n", + "cleanTrainData.showHTML(10)" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n", + "\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.Pipeline\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.PipelineStage\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.classification.GBTClassifier\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.HashingTF\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.IDF\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.RegexTokenizer\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.StopWordsRemover\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.StringIndexer\u001b[39m" + ] + }, + "execution_count": 20, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`org.apache.spark::spark-mllib:2.4.3`\n", + "\n", + "NotebookSparkSession.sync()\n", + "\n", + "import org.apache.spark.ml.Pipeline\n", + "import org.apache.spark.ml.PipelineStage\n", + "import org.apache.spark.ml.classification.GBTClassifier\n", + "import org.apache.spark.ml.feature.HashingTF\n", + "import org.apache.spark.ml.feature.IDF\n", + "import org.apache.spark.ml.feature.RegexTokenizer\n", + "import org.apache.spark.ml.feature.StopWordsRemover\n", + "import org.apache.spark.ml.feature.StringIndexer" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n", + "\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.mllib.feature.Stemmer\u001b[39m" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`com.github.master:spark-stemming_2.10:0.2.1`\n", + "\n", + "NotebookSparkSession.sync()\n", + "\n", + "import org.apache.spark.mllib.feature.Stemmer" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mregexTokenizer\u001b[39m: \u001b[32mRegexTokenizer\u001b[39m = regexTok_226e407d419f\n", + "\u001b[36mstopWordsRemover\u001b[39m: \u001b[32mStopWordsRemover\u001b[39m = stopWords_fd4638462783\n", + "\u001b[36mstemmer\u001b[39m: \u001b[32mStemmer\u001b[39m = stemmer_5019915b3c8c\n", + "\u001b[36mhashingTF\u001b[39m: \u001b[32mHashingTF\u001b[39m = hashingTF_6a7c3984d72f\n", + "\u001b[36midf\u001b[39m: \u001b[32mIDF\u001b[39m = idf_dd3fc989f4bd\n", + "\u001b[36mstringIndexer\u001b[39m: \u001b[32mStringIndexer\u001b[39m = strIdx_e56124ff660c\n", + "\u001b[36mgbt\u001b[39m: \u001b[32mGBTClassifier\u001b[39m = gbtc_2d0fbf17ce28" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val regexTokenizer = new RegexTokenizer()\n", + " .setInputCol(\"text\")\n", + " .setOutputCol(\"words\")\n", + " .setPattern(\"\\\\W\")\n", + "\n", + "val stopWordsRemover = new StopWordsRemover()\n", + " .setInputCol(\"words\")\n", + " .setOutputCol(\"removed\")\n", + "\n", + "// only Scala 2.11\n", + "val stemmer = new Stemmer()\n", + " .setInputCol(\"removed\")\n", + " .setOutputCol(\"stemmed\")\n", + " .setLanguage(\"English\")\n", + "\n", + "val hashingTF = new HashingTF()\n", + " .setInputCol(\"stemmed\")\n", + " .setNumFeatures(3000)\n", + " .setOutputCol(\"rawFeatures\")\n", + "\n", + "val idf = new IDF()\n", + " .setInputCol(\"rawFeatures\")\n", + " .setOutputCol(\"features\")\n", + "\n", + "val stringIndexer = new StringIndexer()\n", + " .setInputCol(\"target\")\n", + " .setOutputCol(\"indexedLabel\")\n", + "\n", + "val gbt = new GBTClassifier()\n", + " .setLabelCol(\"indexedLabel\")\n", + " .setFeaturesCol(\"features\")\n", + " .setPredictionCol(\"predictionTarget\")\n", + " .setMaxIter(30)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mpipeline\u001b[39m: \u001b[32mPipeline\u001b[39m = pipeline_fe9ff24e7f5b" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val pipeline = new Pipeline()\n", + " .setStages(\n", + " Array(\n", + " regexTokenizer, stopWordsRemover, stemmer, hashingTF, idf, stringIndexer, gbt\n", + " )\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mcleanTestData\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, keyword: string ... 2 more fields]" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "var cleanTestData = testData.withColumn(\"text\", regexp_replace(col(\"text\"), \"n\\'t\", \" not\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'re\", \" are\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'s\", \" is\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'d\", \" would\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ll\", \" will\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'t\", \" not\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ve\", \" have\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'m\", \" am\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"won\\'t\", \"will not\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"can\\'t\", \"can not\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"<.*?>+\", \"\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\\\W\", \" \"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\n\", \"\"))\n", + "cleanTestData = cleanTestData.withColumn(\"text\", lower(col(\"text\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " treeAggregate at IDF.scala:54\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " countByValue at StringIndexer.scala:140\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " countByValue at StringIndexer.scala:140\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at GBTClassifier.scala:183\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " take at DecisionTreeMetadata.scala:112\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " count at DecisionTreeMetadata.scala:118\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " flatMap at RandomForest.scala:919\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:927\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " mapPartitions at RandomForest.scala:538\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collectAsMap at RandomForest.scala:567\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mpipelineModel\u001b[39m: \u001b[32morg\u001b[39m.\u001b[32mapache\u001b[39m.\u001b[32mspark\u001b[39m.\u001b[32mml\u001b[39m.\u001b[32mPipelineModel\u001b[39m = pipeline_fe9ff24e7f5b\n", + "\u001b[36mfullPredictions\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, keyword: string ... 10 more fields]" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val pipelineModel = pipeline.fit(cleanTrainData)\n", + "val fullPredictions = pipelineModel.transform(cleanTestData)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " take at cmd12.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "20/12/12 16:29:11 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS\n", + "20/12/12 16:29:11 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS\n" + ] + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idtarget
00
20
31
90
111
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " save at cmd25.sc:13\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.types.IntegerType\n", + "\n", + "\u001b[39m\n", + "\u001b[36mresult\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: int]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql.types.IntegerType\n", + "\n", + "var result = fullPredictions\n", + " .select(\"id\", \"predictionTarget\")\n", + " .withColumn(\"target\", fullPredictions(\"predictionTarget\").cast(IntegerType))\n", + " .drop(\"predictionTarget\")\n", + "\n", + "result.showHTML(5)\n", + "\n", + "result.write\n", + " .format(\"com.databricks.spark.csv\")\n", + " .option(\"header\", \"true\")\n", + " .save(\"result.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " csv at cmd26.sc:6\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd26.sc:8\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd26.sc:8\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summaryidtarget
count32613261
mean5425.0760502913220.2946948788715118
stddev3146.1116090805260.45597539938904524
min00
max99991
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mmySubmission\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: string]" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val mySubmission = spark.sqlContext.read\n", + " .format(\"csv\")\n", + " .option(\"header\", \"true\")\n", + " .option(\"mode\", \"DROPMALFORMED\")\n", + " .option(\"escape\", \"\\\"\")\n", + " .csv(\"result.csv/*.csv\")\n", + "\n", + "mySubmission.describe().showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " csv at cmd27.sc:6\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd27.sc:9\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd27.sc:9\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summaryid
count3263
mean5427.15292675452
stddev3146.4272214965617
min0
max9999
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36msampleSubmission\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string]" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val sampleSubmission = spark.sqlContext.read\n", + " .format(\"csv\")\n", + " .option(\"header\", \"true\")\n", + " .option(\"mode\", \"DROPMALFORMED\")\n", + " .option(\"escape\", \"\\\"\")\n", + " .csv(\"sample_submission.csv\")\n", + " .select(\"id\")\n", + "\n", + "sampleSubmission.describe().showHTML(5)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " run at ThreadPoolExecutor.java:1149\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd28.sc:16\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " describe at cmd28.sc:16\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
summaryidtarget
count32633263
mean5427.152926754520.2945142506895495
stddev3146.42722149656170.45589395982167097
min00
max99991
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mdf_result\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [id: string, target: string]\n", + "\u001b[36mdf_sampleSubmission\u001b[39m: \u001b[32mDataset\u001b[39m[\u001b[32mRow\u001b[39m] = [id: string]\n", + "\u001b[36mjoinedDF\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: string ... 1 more field]\n", + "\u001b[36mresultDF\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, target: string]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val df_result = mySubmission.as(\"dfresult\")\n", + "val df_sampleSubmission = sampleSubmission.as(\"dfsamplesubmission\")\n", + "\n", + "\n", + "val joinedDF = df_result\n", + " .join(df_sampleSubmission, \n", + " col(\"dfsamplesubmission.id\") === col(\"dfresult.id\"),\n", + " \"right\") \n", + "\n", + "var resultDF = joinedDF\n", + " .select(col(\"dfsamplesubmission.id\"),\n", + " when(col(\"dfresult.id\").isNull, lit(0))\n", + " .otherwise(col(\"target\"))\n", + " .as(\"target\"))\n", + "\n", + "resultDF.describe().showHTML()" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " run at ThreadPoolExecutor.java:1149\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " save at cmd29.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " save at cmd29.sc:4\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "resultDF.repartition(1).write\n", + " .format(\"com.databricks.spark.csv\")\n", + " .option(\"header\", \"true\")\n", + " .save(sys.env(\"HOME\") + \"/Documents/disasterTweets/\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pipelineModel.write\n", + " .overwrite\n", + " .save(sys.env(\"HOME\") + \"/Documents/technopolis/bigData2020/hw2/boriskin/model/\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala", + "language": "scala", + "name": "scala" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".scala", + "mimetype": "text/x-scala", + "name": "scala", + "nbconvert_exporter": "script", + "version": "2.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/hw2/boriskin/.gitignore b/hw2/boriskin/.gitignore new file mode 100644 index 0000000..12fae23 --- /dev/null +++ b/hw2/boriskin/.gitignore @@ -0,0 +1 @@ +model diff --git a/hw2/boriskin/hw2.ipynb b/hw2/boriskin/hw2.ipynb new file mode 100644 index 0000000..e053efa --- /dev/null +++ b/hw2/boriskin/hw2.ipynb @@ -0,0 +1,1302 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \u001b[39m" + ] + }, + "execution_count": 4, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`org.apache.spark::spark-sql:2.4.0`" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql._\u001b[39m" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql._" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.log4j.{Level, Logger}\n", + "\u001b[39m" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.log4j.{Level, Logger}\n", + "Logger.getLogger(\"org\").setLevel(Level.OFF)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Loading spark-stubs\n", + "Creating SparkSession\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n" + ] + }, + { + "data": { + "text/html": [ + "Spark UI" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n", + "\n", + "\u001b[39m\n", + "\u001b[36mspark\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@71b497ec" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`sh.almond::almond-spark:0.6.0`\n", + "\n", + "val spark = {\n", + " NotebookSparkSession.builder()\n", + " .appName(\"Spark Structured Streaming\")\n", + " .master(\"local[*]\")\n", + " .getOrCreate()\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "defined \u001b[32mfunction\u001b[39m \u001b[36msc\u001b[39m\n", + "\u001b[36mrdd\u001b[39m: \u001b[32morg\u001b[39m.\u001b[32mapache\u001b[39m.\u001b[32mspark\u001b[39m.\u001b[32mrdd\u001b[39m.\u001b[32mRDD\u001b[39m[\u001b[32mInt\u001b[39m] = ParallelCollectionRDD[0] at parallelize at cmd7.sc:3" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def sc = spark.sparkContext\n", + "\n", + "val rdd = sc.parallelize(1 to 100000000, 100)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36mspark.implicits._\n", + "\n", + "\u001b[39m" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import spark.implicits._\n", + "\n", + "org.apache.spark.sql.catalyst.encoders.OuterScopes.addOuterScope(this);" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36msocketDF\u001b[39m: \u001b[32mDataFrame\u001b[39m = [value: string]" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val socketDF = spark.readStream\n", + " .format(\"socket\")\n", + " .option(\"host\", \"192.168.1.111\")\n", + " .option(\"port\", 8065)\n", + " .load()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[36mres10\u001b[39m: \u001b[32mBoolean\u001b[39m = true" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "socketDF.isStreaming " + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "root\n", + " |-- value: string (nullable = true)\n", + "\n" + ] + } + ], + "source": [ + "socketDF.printSchema" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n", + "\n", + "\u001b[39m\n", + "\u001b[36mres12_1\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@71b497ec\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.PipelineModel\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n", + "\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.Pipeline\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.PipelineStage\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.classification.GBTClassifier\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.HashingTF\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.IDF\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.RegexTokenizer\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.StopWordsRemover\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.ml.feature.StringIndexer\n", + "\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36m$ivy.$ \n", + "\n", + "\u001b[39m\n", + "\u001b[36mres12_13\u001b[39m: \u001b[32mSparkSession\u001b[39m = org.apache.spark.sql.SparkSession@71b497ec\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.mllib.feature.Stemmer\u001b[39m" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import $ivy.`org.apache.spark::spark-mllib:2.4.3`\n", + "\n", + "NotebookSparkSession.sync()\n", + "\n", + "import org.apache.spark.ml.PipelineModel\n", + "import org.apache.spark.sql.functions._\n", + "\n", + "import org.apache.spark.ml.Pipeline\n", + "import org.apache.spark.ml.PipelineStage\n", + "import org.apache.spark.ml.classification.GBTClassifier\n", + "import org.apache.spark.ml.feature.HashingTF\n", + "import org.apache.spark.ml.feature.IDF\n", + "import org.apache.spark.ml.feature.RegexTokenizer\n", + "import org.apache.spark.ml.feature.StopWordsRemover\n", + "import org.apache.spark.ml.feature.StringIndexer\n", + "\n", + "import $ivy.`com.github.master:spark-stemming_2.10:0.2.1`\n", + "\n", + "NotebookSparkSession.sync()\n", + "\n", + "import org.apache.spark.mllib.feature.Stemmer" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + " " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " parquet at IDF.scala:180\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " head at IDF.scala:183\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " parquet at StringIndexer.scala:313\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " head at StringIndexer.scala:315\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " first at ReadWrite.scala:615\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " parquet at treeModels.scala:436\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " sortByKey at treeModels.scala:442\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 8 / 8\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " map at treeModels.scala:437\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 8 / 8\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collect at treeModels.scala:442\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 8 / 8\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " parquet at treeModels.scala:448\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " map at treeModels.scala:450\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 8 / 8\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " sortByKey at treeModels.scala:454\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 8 / 8\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " map at treeModels.scala:450\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 8 / 8\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " collect at treeModels.scala:454\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 8 / 8\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "\u001b[36mmodel\u001b[39m: \u001b[32mPipelineModel\u001b[39m = pipeline_fe9ff24e7f5b" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "val model = PipelineModel.read.load(\"model/\")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.functions._\n", + "\u001b[39m\n", + "\u001b[32mimport \u001b[39m\u001b[36morg.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType}\n", + "\n", + "\u001b[39m\n", + "\u001b[36mschema\u001b[39m: \u001b[32mStructType\u001b[39m = \u001b[33mList\u001b[39m(\n", + " \u001b[33mStructField\u001b[39m(\u001b[32m\"id\"\u001b[39m, StringType, true, {}),\n", + " \u001b[33mStructField\u001b[39m(\u001b[32m\"text\"\u001b[39m, StringType, true, {})\n", + ")\n", + "\u001b[36mdfJSON\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, text: string]\n", + "\u001b[36mclean_data\u001b[39m: \u001b[32mDataFrame\u001b[39m = [id: string, text: string]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import org.apache.spark.sql.functions._\n", + "import org.apache.spark.sql.types.{StructType, StructField, StringType, IntegerType}\n", + "\n", + "val schema = new StructType()\n", + " .add(\"id\", StringType, true)\n", + " .add(\"text\", StringType, true)\n", + "\n", + "val dfJSON = socketDF.withColumn(\"jsonData\",from_json(col(\"value\"),schema))\n", + " .select(\"jsonData.*\")\n", + "\n", + "var clean_data = dfJSON.select(\"id\", \"text\")\n", + "clean_data = clean_data.na.fill(0)\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"n\\'t\", \" not\"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'re\", \" are\"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'s\", \" is\"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'d\", \" would\"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ll\", \" will\"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'t\", \" not\"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'ve\", \" have\"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\'m\", \" am\"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"won\\'t\", \"will not\"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"can\\'t\", \"can not\"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"<.*?>+\", \"\"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\\\W\", \" \"))\n", + "clean_data = clean_data.withColumn(\"text\", regexp_replace(col(\"text\"), \"\\n\", \"\"))\n", + "clean_data = clean_data.withColumn(\"text\", lower(col(\"text\")))" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + " start at cmd16.sc:10\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 8 / 8\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " start at cmd16.sc:10\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " start at cmd16.sc:10\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 8 / 8\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "20/12/12 20:50:53 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeSystemBLAS\n", + "20/12/12 20:50:53 WARN BLAS: Failed to load implementation from: com.github.fommil.netlib.NativeRefBLAS\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + " start at cmd16.sc:10\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " start at cmd16.sc:10\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 8 / 8\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + " start at cmd16.sc:10\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "
\n", + " 1 / 1\n", + "
\n", + "
\n", + "
\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "Interrupted!\n sun.misc.Unsafe.park(\u001b[32mNative Method\u001b[39m)\n java.util.concurrent.locks.LockSupport.park(\u001b[32mLockSupport.java\u001b[39m:\u001b[32m175\u001b[39m)\n java.util.concurrent.locks.AbstractQueuedSynchronizer.parkAndCheckInterrupt(\u001b[32mAbstractQueuedSynchronizer.java\u001b[39m:\u001b[32m836\u001b[39m)\n java.util.concurrent.locks.AbstractQueuedSynchronizer.doAcquireSharedInterruptibly(\u001b[32mAbstractQueuedSynchronizer.java\u001b[39m:\u001b[32m997\u001b[39m)\n java.util.concurrent.locks.AbstractQueuedSynchronizer.acquireSharedInterruptibly(\u001b[32mAbstractQueuedSynchronizer.java\u001b[39m:\u001b[32m1304\u001b[39m)\n java.util.concurrent.CountDownLatch.await(\u001b[32mCountDownLatch.java\u001b[39m:\u001b[32m231\u001b[39m)\n org.apache.spark.sql.execution.streaming.StreamExecution.awaitTermination(\u001b[32mStreamExecution.scala\u001b[39m:\u001b[32m467\u001b[39m)\n org.apache.spark.sql.execution.streaming.StreamingQueryWrapper.awaitTermination(\u001b[32mStreamingQueryWrapper.scala\u001b[39m:\u001b[32m53\u001b[39m)\n ammonite.$sess.cmd16$Helper.(\u001b[32mcmd16.sc\u001b[39m:\u001b[32m11\u001b[39m)\n ammonite.$sess.cmd16$.(\u001b[32mcmd16.sc\u001b[39m:\u001b[32m7\u001b[39m)\n ammonite.$sess.cmd16$.(\u001b[32mcmd16.sc\u001b[39m:\u001b[32m-1\u001b[39m)" + ] + } + ], + "source": [ + "model.transform(clean_data).select(\"id\", \"predictionTarget\")\n", + " .withColumn(\"target\", col(\"predictionTarget\").cast(IntegerType))\n", + " .drop(\"predictionTarget\")\n", + " .repartition(1).writeStream\n", + " .outputMode(\"append\")\n", + " .format(\"com.databricks.spark.csv\")\n", + " .option(\"path\", \"path/\")\n", + " .option(\"checkpointLocation\", \"checkpointLocation/\")\n", + " .option(\"header\", \"true\")\n", + " .start()\n", + " .awaitTermination()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Scala", + "language": "scala", + "name": "scala" + }, + "language_info": { + "codemirror_mode": "text/x-scala", + "file_extension": ".scala", + "mimetype": "text/x-scala", + "name": "scala", + "nbconvert_exporter": "script", + "version": "2.11.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +}