Skip to content

Commit

Permalink
Merge pull request #36 from lyft/v3.1.2-rc1_branch
Browse files Browse the repository at this point in the history
Spark 3.1.2-RC1
  • Loading branch information
catalinii authored May 25, 2021
2 parents bf60b67 + de351e3 commit 0ae05fa
Show file tree
Hide file tree
Showing 539 changed files with 27,468 additions and 5,678 deletions.
116 changes: 95 additions & 21 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,9 @@ jobs:
INCLUDED_TAGS: ${{ matrix.included-tags }}
HADOOP_PROFILE: ${{ matrix.hadoop }}
HIVE_PROFILE: ${{ matrix.hive }}
# GitHub Actions' default miniconda to use in pip packaging test.
CONDA_PREFIX: /usr/share/miniconda
GITHUB_PREV_SHA: ${{ github.event.before }}
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
SPARK_LOCAL_IP: localhost
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
Expand Down Expand Up @@ -166,10 +165,9 @@ jobs:
MODULES_TO_TEST: ${{ matrix.modules }}
HADOOP_PROFILE: hadoop3.2
HIVE_PROFILE: hive2.3
# GitHub Actions' default miniconda to use in pip packaging test.
CONDA_PREFIX: /usr/share/miniconda
GITHUB_PREV_SHA: ${{ github.event.before }}
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
SPARK_LOCAL_IP: localhost
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
Expand Down Expand Up @@ -211,9 +209,14 @@ jobs:
run: |
python3.6 -m pip install numpy 'pyarrow<3.0.0' pandas scipy xmlrunner
python3.6 -m pip list
- name: Install Conda for pip packaging test
run: |
curl -s https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh > miniconda.sh
bash miniconda.sh -b -p $HOME/miniconda
# Run the tests.
- name: Run tests
run: |
export PATH=$PATH:$HOME/miniconda/bin
./dev/run-tests --parallelism 2 --modules "$MODULES_TO_TEST"
- name: Upload test results to report
if: always()
Expand All @@ -238,6 +241,7 @@ jobs:
HIVE_PROFILE: hive2.3
GITHUB_PREV_SHA: ${{ github.event.before }}
GITHUB_INPUT_BRANCH: ${{ github.event.inputs.target }}
SPARK_LOCAL_IP: localhost
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
Expand Down Expand Up @@ -285,6 +289,9 @@ jobs:
lint:
name: Linters, licenses, dependencies and documentation generation
runs-on: ubuntu-20.04
env:
LC_ALL: C.UTF-8
LANG: C.UTF-8
container:
image: dongjoon/apache-spark-github-action-image:20201025
steps:
Expand Down Expand Up @@ -326,20 +333,24 @@ jobs:
run: |
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
python3.6 -m pip install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy numpydoc
# Jinja2 3.0.0+ causes error when building with Sphinx.
# See also https://issues.apache.org/jira/browse/SPARK-35375.
python3.6 -m pip install flake8 'sphinx<3.1.0' numpy pydata_sphinx_theme ipython nbsphinx mypy numpydoc 'jinja2<3.0.0'
- name: Install R linter dependencies and SparkR
run: |
apt-get install -y libcurl4-openssl-dev libgit2-dev libssl-dev libxml2-dev
Rscript -e "install.packages(c('devtools'), repos='https://cloud.r-project.org/')"
Rscript -e "devtools::install_github('jimhester/lintr@v2.0.0')"
Rscript -e "devtools::install_github('jimhester/lintr@v2.0.1')"
./R/install-dev.sh
- name: Install dependencies for documentation generation
run: |
# pandoc is required to generate PySpark APIs as well in nbsphinx.
apt-get install -y libcurl4-openssl-dev pandoc
# TODO(SPARK-32407): Sphinx 3.1+ does not correctly index nested classes.
# See also https://github.com/sphinx-doc/sphinx/issues/7551.
python3.6 -m pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc
# Jinja2 3.0.0+ causes error when building with Sphinx.
# See also https://issues.apache.org/jira/browse/SPARK-35375.
python3.6 -m pip install 'sphinx<3.1.0' mkdocs numpy pydata_sphinx_theme ipython nbsphinx numpydoc 'jinja2<3.0.0'
apt-get update -y
apt-get install -y ruby ruby-dev
gem install jekyll jekyll-redirect-from rouge
Expand All @@ -359,8 +370,6 @@ jobs:
- name: Run documentation build
run: |
cd docs
export LC_ALL=C.UTF-8
export LANG=C.UTF-8
jekyll build
java-11:
Expand All @@ -369,6 +378,17 @@ jobs:
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Maven local repository
uses: actions/cache@v2
with:
Expand All @@ -388,45 +408,99 @@ jobs:
./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Djava.version=11 install
rm -rf ~/.m2/repository/org/apache/spark
scala-213:
name: Scala 2.13 build with SBT
hadoop-2:
name: Hadoop 2 build with SBT
runs-on: ubuntu-20.04
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: scala-213-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
scala-213-coursier-
hadoop-2-coursier-
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Build with SBT
run: |
./dev/change-scala-version.sh 2.13
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Pdocker-integration-tests -Pkubernetes-integration-tests -Pspark-ganglia-lgpl -Pscala-2.13 compile test:compile
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
hadoop-2:
name: Hadoop 2 build with SBT
tpcds-1g:
name: Run TPC-DS queries with SF=1
runs-on: ubuntu-20.04
env:
SPARK_LOCAL_IP: localhost
steps:
- name: Checkout Spark repository
uses: actions/checkout@v2
- name: Cache Scala, SBT and Maven
uses: actions/cache@v2
with:
path: |
build/apache-maven-*
build/scala-*
build/*.jar
~/.sbt
key: build-${{ hashFiles('**/pom.xml', 'project/build.properties', 'build/mvn', 'build/sbt', 'build/sbt-launch-lib.bash', 'build/spark-build-info') }}
restore-keys: |
build-
- name: Cache Coursier local repository
uses: actions/cache@v2
with:
path: ~/.cache/coursier
key: hadoop-2-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
key: tpcds-coursier-${{ hashFiles('**/pom.xml', '**/plugins.sbt') }}
restore-keys: |
hadoop-2-coursier-
tpcds-coursier-
- name: Install Java 8
uses: actions/setup-java@v1
with:
java-version: 8
- name: Build with SBT
- name: Cache TPC-DS generated data
id: cache-tpcds-sf-1
uses: actions/cache@v2
with:
path: ./tpcds-sf-1
key: tpcds-${{ hashFiles('.github/workflows/build_and_test.yml', 'sql/core/src/test/scala/org/apache/spark/sql/TPCDSSchema.scala') }}
- name: Checkout tpcds-kit repository
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
uses: actions/checkout@v2
with:
repository: databricks/tpcds-kit
ref: 2a5078a782192ddb6efbcead8de9973d6ab4f069
path: ./tpcds-kit
- name: Build tpcds-kit
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
run: cd tpcds-kit/tools && make OS=LINUX
- name: Generate TPC-DS (SF=1) table data
if: steps.cache-tpcds-sf-1.outputs.cache-hit != 'true'
run: build/sbt "sql/test:runMain org.apache.spark.sql.GenTPCDSData --dsdgenDir `pwd`/tpcds-kit/tools --location `pwd`/tpcds-sf-1 --scaleFactor 1 --numPartitions 1 --overwrite"
- name: Run TPC-DS queries
run: |
./build/sbt -Pyarn -Pmesos -Pkubernetes -Phive -Phive-thriftserver -Phadoop-cloud -Pkinesis-asl -Phadoop-2.7 compile test:compile
SPARK_TPCDS_DATA=`pwd`/tpcds-sf-1 build/sbt "sql/testOnly org.apache.spark.sql.TPCDSQueryTestSuite"
- name: Upload test results to report
if: always()
uses: actions/upload-artifact@v2
with:
name: test-results-tpcds--8-hadoop3.2-hive2.3
path: "**/target/test-reports/*.xml"
- name: Upload unit tests log files
if: failure()
uses: actions/upload-artifact@v2
with:
name: unit-tests-log-tpcds--8-hadoop3.2-hive2.3
path: "**/target/unit-tests.log"
1 change: 1 addition & 0 deletions .github/workflows/test_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ jobs:
github_token: ${{ secrets.GITHUB_TOKEN }}
workflow: ${{ github.event.workflow_run.workflow_id }}
commit: ${{ github.event.workflow_run.head_commit.id }}
workflow_conclusion: completed
- name: Publish test report
uses: scacap/action-surefire-report@v1
with:
Expand Down
17 changes: 0 additions & 17 deletions .sbtopts

This file was deleted.

2 changes: 1 addition & 1 deletion R/DOCUMENTATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ license: |
# SparkR Documentation

SparkR documentation is generated by using in-source comments and annotated by using
[`roxygen2`](https://cran.r-project.org/web/packages/roxygen2/index.html). After making changes to the documentation and generating man pages,
[`roxygen2`](https://cran.r-project.org/package=roxygen2). After making changes to the documentation and generating man pages,
you can run the following from an R console in the SparkR home directory
```R
library(devtools)
Expand Down
7 changes: 4 additions & 3 deletions R/pkg/DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
Package: SparkR
Type: Package
Version: 3.1.1
Version: 3.1.2
Title: R Front End for 'Apache Spark'
Description: Provides an R Front end for 'Apache Spark' <https://spark.apache.org>.
Authors@R: c(person("Shivaram", "Venkataraman", role = c("aut", "cre"),
Authors@R: c(person("Shivaram", "Venkataraman", role = "aut",
email = "shivaram@cs.berkeley.edu"),
person("Xiangrui", "Meng", role = "aut",
email = "meng@databricks.com"),
person("Felix", "Cheung", role = "aut",
person("Felix", "Cheung", role = c("aut", "cre"),
email = "felixcheung@apache.org"),
person(family = "The Apache Software Foundation", role = c("aut", "cph")))
License: Apache License (== 2.0)
Expand All @@ -20,6 +20,7 @@ Depends:
Suggests:
knitr,
rmarkdown,
markdown,
testthat,
e1071,
survival,
Expand Down
7 changes: 6 additions & 1 deletion R/pkg/R/functions.R
Original file line number Diff line number Diff line change
Expand Up @@ -3578,7 +3578,12 @@ unresolved_named_lambda_var <- function(...) {
"org.apache.spark.sql.Column",
newJObject(
"org.apache.spark.sql.catalyst.expressions.UnresolvedNamedLambdaVariable",
list(...)
lapply(list(...), function(x) {
handledCallJStatic(
"org.apache.spark.sql.catalyst.expressions.UnresolvedNamedLambdaVariable",
"freshVarName",
x)
})
)
)
column(jc)
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/R/mllib_classification.R
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,7 @@ setMethod("write.ml", signature(object = "MultilayerPerceptronClassificationMode
#' @rdname spark.naiveBayes
#' @aliases spark.naiveBayes,SparkDataFrame,formula-method
#' @name spark.naiveBayes
#' @seealso e1071: \url{https://cran.r-project.org/web/packages/e1071/index.html}
#' @seealso e1071: \url{https://cran.r-project.org/package=e1071}
#' @examples
#' \dontrun{
#' data <- as.data.frame(UCBAdmissions)
Expand Down
4 changes: 2 additions & 2 deletions R/pkg/R/mllib_clustering.R
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ setMethod("write.ml", signature(object = "BisectingKMeansModel", path = "charact
#' @return \code{spark.gaussianMixture} returns a fitted multivariate gaussian mixture model.
#' @rdname spark.gaussianMixture
#' @name spark.gaussianMixture
#' @seealso mixtools: \url{https://cran.r-project.org/web/packages/mixtools/index.html}
#' @seealso mixtools: \url{https://cran.r-project.org/package=mixtools}
#' @examples
#' \dontrun{
#' sparkR.session()
Expand Down Expand Up @@ -483,7 +483,7 @@ setMethod("write.ml", signature(object = "KMeansModel", path = "character"),
#' @return \code{spark.lda} returns a fitted Latent Dirichlet Allocation model.
#' @rdname spark.lda
#' @aliases spark.lda,SparkDataFrame-method
#' @seealso topicmodels: \url{https://cran.r-project.org/web/packages/topicmodels/index.html}
#' @seealso topicmodels: \url{https://cran.r-project.org/package=topicmodels}
#' @examples
#' \dontrun{
#' text <- read.df("data/mllib/sample_lda_libsvm_data.txt", source = "libsvm")
Expand Down
2 changes: 1 addition & 1 deletion R/pkg/R/mllib_regression.R
Original file line number Diff line number Diff line change
Expand Up @@ -475,7 +475,7 @@ setMethod("write.ml", signature(object = "IsotonicRegressionModel", path = "char
#' @param ... additional arguments passed to the method.
#' @return \code{spark.survreg} returns a fitted AFT survival regression model.
#' @rdname spark.survreg
#' @seealso survival: \url{https://cran.r-project.org/web/packages/survival/index.html}
#' @seealso survival: \url{https://cran.r-project.org/package=survival}
#' @examples
#' \dontrun{
#' df <- createDataFrame(ovarian)
Expand Down
14 changes: 14 additions & 0 deletions R/pkg/tests/fulltests/test_sparkSQL.R
Original file line number Diff line number Diff line change
Expand Up @@ -2153,6 +2153,20 @@ test_that("higher order functions", {
expect_error(array_transform("xs", function(...) 42))
})

test_that("SPARK-34794: lambda vars must be resolved properly in nested higher order functions", {
df <- sql("SELECT array(1, 2, 3) as numbers, array('a', 'b', 'c') as letters")
ret <- first(select(
df,
array_transform("numbers", function(number) {
array_transform("letters", function(latter) {
struct(alias(number, "n"), alias(latter, "l"))
})
})
))

expect_equal(1, ret[[1]][[1]][[1]][[1]]$n)
})

test_that("group by, agg functions", {
df <- read.json(jsonPath)
df1 <- agg(df, name = "max", age = "sum")
Expand Down
2 changes: 1 addition & 1 deletion assembly/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
<parent>
<groupId>org.apache.spark</groupId>
<artifactId>spark-parent_2.12</artifactId>
<version>3.1.1</version>
<version>3.1.2</version>
<relativePath>../pom.xml</relativePath>
</parent>

Expand Down
Loading

0 comments on commit 0ae05fa

Please sign in to comment.