@@ -37,7 +37,6 @@ import org.apache.spark.api.java.function.MapFunction
37
37
import org.apache.spark.api.java.function.ReduceFunction
38
38
import org.apache.spark.rdd.RDD
39
39
import org.apache.spark.sql.*
40
- import org.jetbrains.kotlinx.spark.extensions.KSparkExtensions
41
40
import scala.Tuple2
42
41
import scala.Tuple3
43
42
import scala.Tuple4
@@ -49,7 +48,7 @@ import kotlin.reflect.KProperty1
49
48
* Utility method to create dataset from list
50
49
*/
51
50
inline fun <reified T > SparkSession.toDS (list : List <T >): Dataset <T > =
52
- createDataset(list, encoder <T >())
51
+ createDataset(list, kotlinEncoderFor <T >())
53
52
54
53
/* *
55
54
* Utility method to create dataframe from list
@@ -61,26 +60,26 @@ inline fun <reified T> SparkSession.toDF(list: List<T>, vararg colNames: String)
61
60
* Utility method to create dataset from *array or vararg arguments
62
61
*/
63
62
inline fun <reified T > SparkSession.dsOf (vararg t : T ): Dataset <T > =
64
- createDataset(t.toList(), encoder <T >())
63
+ createDataset(t.toList(), kotlinEncoderFor <T >())
65
64
66
65
/* *
67
66
* Utility method to create dataframe from *array or vararg arguments
68
67
*/
69
68
inline fun <reified T > SparkSession.dfOf (vararg t : T ): Dataset <Row > =
70
- createDataset(t.toList(), encoder <T >()).toDF()
69
+ createDataset(t.toList(), kotlinEncoderFor <T >()).toDF()
71
70
72
71
/* *
73
72
* Utility method to create dataframe from *array or vararg arguments with given column names
74
73
*/
75
74
inline fun <reified T > SparkSession.dfOf (colNames : Array <String >, vararg t : T ): Dataset <Row > =
76
- createDataset(t.toList(), encoder <T >())
75
+ createDataset(t.toList(), kotlinEncoderFor <T >())
77
76
.run { if (colNames.isEmpty()) toDF() else toDF(* colNames) }
78
77
79
78
/* *
80
79
* Utility method to create dataset from list
81
80
*/
82
81
inline fun <reified T > List<T>.toDS (spark : SparkSession ): Dataset <T > =
83
- spark.createDataset(this , encoder <T >())
82
+ spark.createDataset(this , kotlinEncoderFor <T >())
84
83
85
84
/* *
86
85
* Utility method to create dataframe from list
@@ -104,13 +103,13 @@ inline fun <reified T> Array<T>.toDF(spark: SparkSession, vararg colNames: Strin
104
103
* Utility method to create dataset from RDD
105
104
*/
106
105
inline fun <reified T > RDD<T>.toDS (spark : SparkSession ): Dataset <T > =
107
- spark.createDataset(this , encoder <T >())
106
+ spark.createDataset(this , kotlinEncoderFor <T >())
108
107
109
108
/* *
110
109
* Utility method to create dataset from JavaRDD
111
110
*/
112
111
inline fun <reified T > JavaRDDLike <T , * >.toDS (spark : SparkSession ): Dataset <T > =
113
- spark.createDataset(this .rdd(), encoder <T >())
112
+ spark.createDataset(this .rdd(), kotlinEncoderFor <T >())
114
113
115
114
/* *
116
115
* Utility method to create Dataset<Row> (Dataframe) from JavaRDD.
@@ -132,37 +131,37 @@ inline fun <reified T> RDD<T>.toDF(spark: SparkSession, vararg colNames: String)
132
131
* Returns a new Dataset that contains the result of applying [func] to each element.
133
132
*/
134
133
inline fun <reified T , reified R > Dataset<T>.map (noinline func : (T ) -> R ): Dataset <R > =
135
- map(MapFunction (func), encoder <R >())
134
+ map(MapFunction (func), kotlinEncoderFor <R >())
136
135
137
136
/* *
138
137
* (Kotlin-specific)
139
138
* Returns a new Dataset by first applying a function to all elements of this Dataset,
140
139
* and then flattening the results.
141
140
*/
142
141
inline fun <T , reified R > Dataset<T>.flatMap (noinline func : (T ) -> Iterator <R >): Dataset <R > =
143
- flatMap(func, encoder <R >())
142
+ flatMap(func, kotlinEncoderFor <R >())
144
143
145
144
/* *
146
145
* (Kotlin-specific)
147
146
* Returns a new Dataset by flattening. This means that a Dataset of an iterable such as
148
147
* `listOf(listOf(1, 2, 3), listOf(4, 5, 6))` will be flattened to a Dataset of `listOf(1, 2, 3, 4, 5, 6)`.
149
148
*/
150
149
inline fun <reified T , I : Iterable <T >> Dataset<I>.flatten (): Dataset <T > =
151
- flatMap(FlatMapFunction { it.iterator() }, encoder <T >())
150
+ flatMap(FlatMapFunction { it.iterator() }, kotlinEncoderFor <T >())
152
151
153
152
/* *
154
153
* (Kotlin-specific)
155
154
* Returns a [KeyValueGroupedDataset] where the data is grouped by the given key [func].
156
155
*/
157
156
inline fun <T , reified R > Dataset<T>.groupByKey (noinline func : (T ) -> R ): KeyValueGroupedDataset <R , T > =
158
- groupByKey(MapFunction (func), encoder <R >())
157
+ groupByKey(MapFunction (func), kotlinEncoderFor <R >())
159
158
160
159
/* *
161
160
* (Kotlin-specific)
162
161
* Returns a new Dataset that contains the result of applying [func] to each partition.
163
162
*/
164
163
inline fun <T , reified R > Dataset<T>.mapPartitions (noinline func : (Iterator <T >) -> Iterator <R >): Dataset <R > =
165
- mapPartitions(func, encoder <R >())
164
+ mapPartitions(func, kotlinEncoderFor <R >())
166
165
167
166
/* *
168
167
* (Kotlin-specific)
@@ -193,15 +192,6 @@ inline fun <reified T1, T2> Dataset<Tuple2<T1, T2>>.takeKeys(): Dataset<T1> = ma
193
192
*/
194
193
inline fun <reified T1 , T2 > Dataset <Pair <T1 , T2 >>.takeKeys (): Dataset <T1 > = map { it.first }
195
194
196
- /* *
197
- * (Kotlin-specific)
198
- * Maps the Dataset to only retain the "keys" or [Arity2._1] values.
199
- */
200
- @Suppress(" DEPRECATION" )
201
- @JvmName(" takeKeysArity2" )
202
- @Deprecated(" Use Scala tuples instead." , ReplaceWith (" " ))
203
- inline fun <reified T1 , T2 > Dataset <Arity2 <T1 , T2 >>.takeKeys (): Dataset <T1 > = map { it._1 }
204
-
205
195
/* *
206
196
* (Kotlin-specific)
207
197
* Maps the Dataset to only retain the "values" or [Tuple2._2] values.
@@ -215,22 +205,13 @@ inline fun <T1, reified T2> Dataset<Tuple2<T1, T2>>.takeValues(): Dataset<T2> =
215
205
*/
216
206
inline fun <T1 , reified T2 > Dataset <Pair <T1 , T2 >>.takeValues (): Dataset <T2 > = map { it.second }
217
207
218
- /* *
219
- * (Kotlin-specific)
220
- * Maps the Dataset to only retain the "values" or [Arity2._2] values.
221
- */
222
- @Suppress(" DEPRECATION" )
223
- @JvmName(" takeValuesArity2" )
224
- @Deprecated(" Use Scala tuples instead." , ReplaceWith (" " ))
225
- inline fun <T1 , reified T2 > Dataset <Arity2 <T1 , T2 >>.takeValues (): Dataset <T2 > = map { it._2 }
226
-
227
208
/* * DEPRECATED: Use [as] or [to] for this. */
228
209
@Deprecated(
229
210
message = " Deprecated, since we already have `as`() and to()." ,
230
211
replaceWith = ReplaceWith (" this.to<R>()" ),
231
212
level = DeprecationLevel .ERROR ,
232
213
)
233
- inline fun <T , reified R > Dataset<T>.downcast (): Dataset <R > = `as `(encoder <R >())
214
+ inline fun <T , reified R > Dataset<T>.downcast (): Dataset <R > = `as `(kotlinEncoderFor <R >())
234
215
235
216
/* *
236
217
* (Kotlin-specific)
@@ -252,7 +233,7 @@ inline fun <T, reified R> Dataset<T>.downcast(): Dataset<R> = `as`(encoder<R>())
252
233
*
253
234
* @see to as alias for [as]
254
235
*/
255
- inline fun <reified R > Dataset <* >.`as` (): Dataset <R > = `as `(encoder <R >())
236
+ inline fun <reified R > Dataset <* >.`as` (): Dataset <R > = `as `(kotlinEncoderFor <R >())
256
237
257
238
/* *
258
239
* (Kotlin-specific)
@@ -274,7 +255,7 @@ inline fun <reified R> Dataset<*>.`as`(): Dataset<R> = `as`(encoder<R>())
274
255
*
275
256
* @see as as alias for [to]
276
257
*/
277
- inline fun <reified R > Dataset <* >.to (): Dataset <R > = `as `(encoder <R >())
258
+ inline fun <reified R > Dataset <* >.to (): Dataset <R > = `as `(kotlinEncoderFor <R >())
278
259
279
260
/* *
280
261
* (Kotlin-specific)
@@ -292,12 +273,16 @@ inline fun <reified T> Dataset<T>.forEachPartition(noinline func: (Iterator<T>)
292
273
/* *
293
274
* It's hard to call `Dataset.debugCodegen` from kotlin, so here is utility for that
294
275
*/
295
- fun <T > Dataset<T>.debugCodegen (): Dataset <T > = also { KSparkExtensions .debugCodegen(it) }
276
+ fun <T > Dataset<T>.debugCodegen (): Dataset <T > = also {
277
+ org.apache.spark.sql.execution.debug.`package$`.`MODULE $`.DebugQuery (it).debugCodegen()
278
+ }
296
279
297
280
/* *
298
281
* It's hard to call `Dataset.debug` from kotlin, so here is utility for that
299
282
*/
300
- fun <T > Dataset<T>.debug (): Dataset <T > = also { KSparkExtensions .debug(it) }
283
+ fun <T > Dataset<T>.debug (): Dataset <T > = also {
284
+ org.apache.spark.sql.execution.debug.`package$`.`MODULE $`.DebugQuery (it).debug()
285
+ }
301
286
302
287
303
288
/* *
@@ -370,18 +355,6 @@ fun <T1, T2> Dataset<Tuple2<T1, T2>>.sortByKey(): Dataset<Tuple2<T1, T2>> = sort
370
355
@JvmName(" sortByTuple2Value" )
371
356
fun <T1 , T2 > Dataset <Tuple2 <T1 , T2 >>.sortByValue (): Dataset <Tuple2 <T1 , T2 >> = sort(" _2" )
372
357
373
- /* * Returns a dataset sorted by the first (`_1`) value of each [Arity2] inside. */
374
- @Suppress(" DEPRECATION" )
375
- @Deprecated(" Use Scala tuples instead." , ReplaceWith (" " ))
376
- @JvmName(" sortByArity2Key" )
377
- fun <T1 , T2 > Dataset <Arity2 <T1 , T2 >>.sortByKey (): Dataset <Arity2 <T1 , T2 >> = sort(" _1" )
378
-
379
- /* * Returns a dataset sorted by the second (`_2`) value of each [Arity2] inside. */
380
- @Suppress(" DEPRECATION" )
381
- @Deprecated(" Use Scala tuples instead." , ReplaceWith (" " ))
382
- @JvmName(" sortByArity2Value" )
383
- fun <T1 , T2 > Dataset <Arity2 <T1 , T2 >>.sortByValue (): Dataset <Arity2 <T1 , T2 >> = sort(" _2" )
384
-
385
358
/* * Returns a dataset sorted by the first (`first`) value of each [Pair] inside. */
386
359
@JvmName(" sortByPairKey" )
387
360
fun <T1 , T2 > Dataset <Pair <T1 , T2 >>.sortByKey (): Dataset <Pair <T1 , T2 >> = sort(" first" )
0 commit comments