Skip to content

Commit

Permalink
Merge pull request #217 from huanshankeji/tolist-to-aslist
Browse files Browse the repository at this point in the history
Replace calls of `toList()` on `Array`s with `asList()` to improve performance of the affected functions by reducing the overhead of copying the array elements for once
  • Loading branch information
Jolanrensen authored Jan 2, 2024
2 parents d787f9d + 48370b5 commit 34a9489
Show file tree
Hide file tree
Showing 4 changed files with 7 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,19 +61,19 @@ inline fun <reified T> SparkSession.toDF(list: List<T>, vararg colNames: String)
* Utility method to create dataset from *array or vararg arguments
*/
inline fun <reified T> SparkSession.dsOf(vararg t: T): Dataset<T> =
createDataset(t.toList(), encoder<T>())
createDataset(t.asList(), encoder<T>())

/**
* Utility method to create dataframe from *array or vararg arguments
*/
inline fun <reified T> SparkSession.dfOf(vararg t: T): Dataset<Row> =
createDataset(t.toList(), encoder<T>()).toDF()
createDataset(t.asList(), encoder<T>()).toDF()

/**
* Utility method to create dataframe from *array or vararg arguments with given column names
*/
inline fun <reified T> SparkSession.dfOf(colNames: Array<String>, vararg t: T): Dataset<Row> =
createDataset(t.toList(), encoder<T>())
createDataset(t.asList(), encoder<T>())
.run { if (colNames.isEmpty()) toDF() else toDF(*colNames) }

/**
Expand All @@ -92,7 +92,7 @@ inline fun <reified T> List<T>.toDF(spark: SparkSession, vararg colNames: String
* Utility method to create dataset from list
*/
inline fun <reified T> Array<T>.toDS(spark: SparkSession): Dataset<T> =
toList().toDS(spark)
asList().toDS(spark)

/**
* Utility method to create dataframe from list
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ import java.io.Serializable
fun <T> JavaSparkContext.rddOf(
vararg elements: T,
numSlices: Int = defaultParallelism(),
): JavaRDD<T> = parallelize(elements.toList(), numSlices)
): JavaRDD<T> = parallelize(elements.asList(), numSlices)

/**
* Utility method to create an RDD from a list.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ class KSparkSession(val spark: SparkSession) {
* NOTE: [T] must be [Serializable].
*/
fun <T> rddOf(vararg elements: T, numSlices: Int = sc.defaultParallelism()): JavaRDD<T> =
sc.toRDD(elements.toList(), numSlices)
sc.toRDD(elements.asList(), numSlices)

/**
* A collection of methods for registering user-defined functions (UDF).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ class TypeInferenceTest : ShouldSpec({
should("generate valid serializer schema") {
expect(encoder<Sample>().schema()) {
this
.feature("data type", { this.fields()?.toList() }) {
.feature("data type", { this.fields()?.asList() }) {
this.notToEqualNull().toContain.inOrder.only.entry {
this
.feature("element name", { name() }) { toEqual("optionList") }
Expand Down

0 comments on commit 34a9489

Please sign in to comment.