Merged branch 2.3.X

jpmml · Dec 29, 2023 · 594fec9 · 594fec9
2 parents 8ca0004 + 87ab2ef
commit 594fec9
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 9 deletions.
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
@@ -2,7 +2,7 @@ name: maven
 
 on:
   push:
-    branches: [ '2.0.X', '2.1.X', '2.2.X', '2.3.X', master ]
+    branches: [ '2.0.X', '2.1.X', '2.2.X', '2.3.X', '2.4.X', master ]
 
 jobs:
   build:

diff --git a/README.md b/README.md
@@ -45,6 +45,8 @@ Java library and command-line application for converting Apache Spark ML pipelin
 <details>
   <summary>Apache Spark ML</summary>
 
+  Examples: [main.py](https://github.com/jpmml/jpmml-sparkml/blob/2.4.X/pmml-sparkml/src/test/resources/main.py)
+
   * Feature extractors, transformers and selectors:
     * [`feature.Binarizer`](https://spark.apache.org/docs/latest/api/java/org/apache/spark/ml/feature/Binarizer.html)
     * [`feature.Bucketizer`](https://spark.apache.org/docs/latest/api/java/org/apache/spark/ml/feature/Bucketizer.html)
@@ -120,6 +122,8 @@ Java library and command-line application for converting Apache Spark ML pipelin
 <details>
   <summary>LightGBM</summary>
 
+  Examples: [LightGBMAuditNA.scala](https://github.com/jpmml/jpmml-sparkml/blob/2.4.X/pmml-sparkml-lightgbm/src/test/resources/LightGBMAuditNA.scala), [LightGBMAutoNA.scaka](https://github.com/jpmml/jpmml-sparkml/blob/2.4.X/pmml-sparkml-lightgbm/src/test/resources/LightGBMAutoNA.scala), etc.
+
   * Prediction models:
     * [`com.microsoft.azure.synapse.ml.lightgbm.LightGBMClassificationModel`](https://mmlspark.blob.core.windows.net/docs/0.9.5/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMClassificationModel.html)
     * [`com.microsoft.azure.synapse.ml.lightgbm.LightGBMRegressionModel`](https://mmlspark.blob.core.windows.net/docs/0.9.5/scala/com/microsoft/azure/synapse/ml/lightgbm/LightGBMRegressionModel.html)
@@ -128,14 +132,16 @@ Java library and command-line application for converting Apache Spark ML pipelin
 <details>
   <summary>XGBoost</summary>
 
+  Examples: [XGBoostAuditNA.scala](https://github.com/jpmml/jpmml-sparkml/blob/2.4.X/pmml-sparkml-xgboost/src/test/resources/XGBoostAuditNA.scala), [XGBoostAutoNA.scala](https://github.com/jpmml/jpmml-sparkml/blob/2.4.X/pmml-sparkml-xgboost/src/test/resources/XGBoostAutoNA.scala), etc.
+
   * Prediction models:
     * [`ml.dmlc.xgboost4j.scala.spark.XGBoostClassificationModel`](https://xgboost.readthedocs.io/en/latest/jvm/scaladocs/xgboost4j-spark/ml/dmlc/xgboost4j/scala/spark/XGBoostClassificationModel.html)
     * [`ml.dmlc.xgboost4j.scala.spark.XGBoostRegressionModel`](https://xgboost.readthedocs.io/en/latest/jvm/scaladocs/xgboost4j-spark/ml/dmlc/xgboost4j/scala/spark/XGBoostRegressionModel.html)
 </details>
 
 # Prerequisites #
 
-* Apache Spark 3.0.X, 3.1.X, 3.2.X, 3.3.X or 3.4.X.
+* Apache Spark 3.0.X, 3.1.X, 3.2.X, 3.3.X, 3.4.X or 3.5.X.
 
 # Installation #
 
@@ -163,7 +169,8 @@ Active development branches:
 | 3.1.X | [`2.1.X`](https://github.com/jpmml/jpmml-sparkml/tree/2.1.X) |
 | 3.2.X | [`2.2.X`](https://github.com/jpmml/jpmml-sparkml/tree/2.2.X) |
 | 3.3.X | [`2.3.X`](https://github.com/jpmml/jpmml-sparkml/tree/2.3.X) |
-| 3.4.X | [`master`](https://github.com/jpmml/jpmml-sparkml/tree/master) |
+| 3.4.X | [`2.4.X`](https://github.com/jpmml/jpmml-sparkml/tree/2.4.X) |
+| 3.5.X | [`master`](https://github.com/jpmml/jpmml-sparkml/tree/master) |
 
 Archived development branches:
 

diff --git a/pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala b/pmml-sparkml-xgboost/src/test/resources/XGBoostHousing.scala
@@ -1,6 +1,6 @@
 import java.io.File
 
-import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostRegressor}
+import ml.dmlc.xgboost4j.scala.spark.XGBoostRegressor
 import org.apache.spark.ml.Pipeline
 import org.apache.spark.ml.feature._
 import org.apache.spark.sql.types.FloatType
@@ -16,8 +16,7 @@ val cont_cols = Array("CRIM", "ZN", "INDUS", "NOX", "RM", "AGE", "DIS", "PTRATIO
 val assembler = new VectorAssembler().setInputCols(cat_cols ++ cont_cols).setOutputCol("featureVector")
 val indexer = new VectorIndexer().setInputCol(assembler.getOutputCol).setOutputCol("catFeatureVector")
 
-val trackerConf = TrackerConf(0, "scala")
-val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101, "num_workers" -> 1, "tracker_conf" -> trackerConf)).setMissing(-1).setLabelCol("MEDV").setFeaturesCol(indexer.getOutputCol)
+val regressor = new XGBoostRegressor(Map("objective" -> "reg:squarederror", "num_round" -> 101)).setMissing(-1).setLabelCol("MEDV").setFeaturesCol(indexer.getOutputCol)
 
 val pipeline = new Pipeline().setStages(Array(assembler, indexer, regressor))
 val pipelineModel = pipeline.fit(df)

diff --git a/pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala b/pmml-sparkml-xgboost/src/test/resources/XGBoostIris.scala
@@ -1,6 +1,6 @@
 import java.io.File
 
-import ml.dmlc.xgboost4j.scala.spark.{TrackerConf, XGBoostClassifier}
+import ml.dmlc.xgboost4j.scala.spark.XGBoostClassifier
 import org.apache.spark.ml.Pipeline
 import org.apache.spark.ml.feature._
 import org.apache.spark.ml.linalg.Vector
@@ -22,8 +22,7 @@ val labelIndexerModel = labelIndexer.fit(df)
 
 val assembler = new VectorAssembler().setInputCols(Array("Sepal_Length", "Sepal_Width", "Petal_Length", "Petal_Width")).setOutputCol("featureVector")
 
-val trackerConf = TrackerConf(0, "scala")
-val classifier = new XGBoostClassifier(Map("objective" -> "multi:softprob", "num_class" -> 3, "num_round" -> 17, "tracker_conf" -> trackerConf)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(assembler.getOutputCol)
+val classifier = new XGBoostClassifier(Map("objective" -> "multi:softprob", "num_class" -> 3)).setLabelCol(labelIndexer.getOutputCol).setFeaturesCol(assembler.getOutputCol)
 
 val pipeline = new Pipeline().setStages(Array(labelIndexer, assembler, classifier))
 val pipelineModel = pipeline.fit(df)